svn commit: r901317 - in /hadoop/pig/branches/branch-0.6: CHANGES.txt src/org/apache/pig/impl/logicalLayer/LOCast.java src/org/apache/pig/impl/logicalLayer/optimizer/SchemaRemover.java test/org/apache

2010-01-20 Thread gates
Author: gates
Date: Wed Jan 20 18:34:41 2010
New Revision: 901317

URL: http://svn.apache.org/viewvc?rev=901317view=rev
Log:
PIG-1191: POCast throws exception for certain sequences of LOAD, FILTER, 
FORACH.  Checking in for Pradeep since he is out.


Modified:
hadoop/pig/branches/branch-0.6/CHANGES.txt

hadoop/pig/branches/branch-0.6/src/org/apache/pig/impl/logicalLayer/LOCast.java

hadoop/pig/branches/branch-0.6/src/org/apache/pig/impl/logicalLayer/optimizer/SchemaRemover.java

hadoop/pig/branches/branch-0.6/test/org/apache/pig/test/TestTypeCheckingValidator.java

Modified: hadoop/pig/branches/branch-0.6/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.6/CHANGES.txt?rev=901317r1=901316r2=901317view=diff
==
--- hadoop/pig/branches/branch-0.6/CHANGES.txt (original)
+++ hadoop/pig/branches/branch-0.6/CHANGES.txt Wed Jan 20 18:34:41 2010
@@ -145,6 +145,9 @@
 
 BUG FIXES
 
+PIG-1191: POCast throws exception for certain sequences of LOAD, FILTER,
+FORACH (pradeepkth via gates)
+
 PIG-1143: Poisson Sample Loader should compute the number of samples required
 only once (sriranjan via olgan)
 

Modified: 
hadoop/pig/branches/branch-0.6/src/org/apache/pig/impl/logicalLayer/LOCast.java
URL: 
http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.6/src/org/apache/pig/impl/logicalLayer/LOCast.java?rev=901317r1=901316r2=901317view=diff
==
--- 
hadoop/pig/branches/branch-0.6/src/org/apache/pig/impl/logicalLayer/LOCast.java 
(original)
+++ 
hadoop/pig/branches/branch-0.6/src/org/apache/pig/impl/logicalLayer/LOCast.java 
Wed Jan 20 18:34:41 2010
@@ -26,6 +26,7 @@
 import org.apache.pig.impl.plan.PlanVisitor;
 import org.apache.pig.impl.plan.VisitorException;
 import org.apache.pig.impl.logicalLayer.schema.Schema;
+import org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema;
 import org.apache.pig.data.DataType;
 
 public class LOCast extends ExpressionOperator {
@@ -34,6 +35,11 @@
 
 private static final long serialVersionUID = 2L;
 private FuncSpec mLoadFuncSpec = null;
+// store field schema representing the schema 
+// in user specified casts -this is so that if
+// field schema is unset and then getFieldSchema is called we still 
+// rebuild the fieldschema correctly as specified by the user in the script
+private FieldSchema userSpecifiedFieldSchema;
 
 /**
  * 
@@ -65,11 +71,22 @@
 public Schema getSchema() {
 return mSchema;
 }
+
+
+@Override
+public void setFieldSchema(FieldSchema fs) throws FrontendException {
+super.setFieldSchema(fs);
+userSpecifiedFieldSchema = new Schema.FieldSchema(fs);
+}
 
 @Override
 public Schema.FieldSchema getFieldSchema() throws FrontendException {
 if(!mIsFieldSchemaComputed) {
-mFieldSchema = new Schema.FieldSchema(null, mType);
+if(userSpecifiedFieldSchema != null) {
+mFieldSchema = userSpecifiedFieldSchema;
+} else {
+mFieldSchema = new Schema.FieldSchema(null, mType);
+}
 Schema.FieldSchema parFs  = getExpression().getFieldSchema();
 String canonicalName = (parFs != null ? parFs.canonicalName : 
null);
 mFieldSchema.setParent(canonicalName, getExpression());

Modified: 
hadoop/pig/branches/branch-0.6/src/org/apache/pig/impl/logicalLayer/optimizer/SchemaRemover.java
URL: 
http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.6/src/org/apache/pig/impl/logicalLayer/optimizer/SchemaRemover.java?rev=901317r1=901316r2=901317view=diff
==
--- 
hadoop/pig/branches/branch-0.6/src/org/apache/pig/impl/logicalLayer/optimizer/SchemaRemover.java
 (original)
+++ 
hadoop/pig/branches/branch-0.6/src/org/apache/pig/impl/logicalLayer/optimizer/SchemaRemover.java
 Wed Jan 20 18:34:41 2010
@@ -37,6 +37,7 @@
  *the logical binary expression operator that has to be visited
  * @throws VisitorException
  */
+@Override
 protected void visit(BinaryExpressionOperator binOp)
 throws VisitorException {
 binOp.unsetFieldSchema();
@@ -49,6 +50,7 @@
  *the logical unary operator that has to be visited
  * @throws VisitorException
  */
+@Override
 protected void visit(UnaryExpressionOperator uniOp) throws 
VisitorException {
 uniOp.unsetFieldSchema();
 super.visit(uniOp);
@@ -60,6 +62,7 @@
  *the logical cogroup operator that has to be visited
  * @throws VisitorException
  */
+@Override
 protected void visit(LOCogroup cg) throws VisitorException {
 cg.unsetSchema();
 super.visit(cg);
@@ -71,6 +74,7 @@
  *the logical sort operator that has to be visited

svn commit: r901333 - in /hadoop/pig/branches/branch-0.6: ./ src/docs/src/documentation/content/xdocs/

2010-01-20 Thread olga
Author: olga
Date: Wed Jan 20 19:03:57 2010
New Revision: 901333

URL: http://svn.apache.org/viewvc?rev=901333view=rev
Log:
PIG-1192: Pig 0.6 Docs fixes (chandec via olgan)

Modified:
hadoop/pig/branches/branch-0.6/CHANGES.txt

hadoop/pig/branches/branch-0.6/src/docs/src/documentation/content/xdocs/cookbook.xml

hadoop/pig/branches/branch-0.6/src/docs/src/documentation/content/xdocs/index.xml

hadoop/pig/branches/branch-0.6/src/docs/src/documentation/content/xdocs/setup.xml

hadoop/pig/branches/branch-0.6/src/docs/src/documentation/content/xdocs/site.xml

hadoop/pig/branches/branch-0.6/src/docs/src/documentation/content/xdocs/zebra_pig.xml

hadoop/pig/branches/branch-0.6/src/docs/src/documentation/content/xdocs/zebra_users.xml

Modified: hadoop/pig/branches/branch-0.6/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.6/CHANGES.txt?rev=901333r1=901332r2=901333view=diff
==
--- hadoop/pig/branches/branch-0.6/CHANGES.txt (original)
+++ hadoop/pig/branches/branch-0.6/CHANGES.txt Wed Jan 20 19:03:57 2010
@@ -26,6 +26,8 @@
 
 IMPROVEMENTS
 
+PIG-1192: Pig 0.6 Docs fixes (chandec via olgan)
+
 PIG-1177: Pig 0.6 Docs - Zebra docs (chandec via olgan)
 
 PIG-1175: Pig 0.6 Docs - Store v. Dump (chandec via olgan)

Modified: 
hadoop/pig/branches/branch-0.6/src/docs/src/documentation/content/xdocs/cookbook.xml
URL: 
http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.6/src/docs/src/documentation/content/xdocs/cookbook.xml?rev=901333r1=901332r2=901333view=diff
==
--- 
hadoop/pig/branches/branch-0.6/src/docs/src/documentation/content/xdocs/cookbook.xml
 (original)
+++ 
hadoop/pig/branches/branch-0.6/src/docs/src/documentation/content/xdocs/cookbook.xml
 Wed Jan 20 19:03:57 2010
@@ -36,7 +36,7 @@
 
 section
 titleUse Optimization/title
-pPig supports various a 
href=piglatin_users.html#Optimization+Rulesoptimization rules/a which are 
turned on by default. 
+pPig supports various a 
href=piglatin_ref1.html#Optimization+Rulesoptimization rules/a which are 
turned on by default. 
 Become familiar with these rules./p
 /section
 
@@ -220,29 +220,34 @@
 section
 titleSpecialized Join Optimizations/title
 pOptimization can also be achieved using fragment replicate joins, skewed 
joins, and merge joins. 
-For more information see a 
href=piglatin_users.html#Specialized+JoinsSpecialized Joins/a./p
+For more information see a 
href=piglatin_ref1.html#Specialized+JoinsSpecialized Joins/a./p
 /section
 
 /section
 
 
 section
-titleUse the PARALLEL Keyword/title
+titleUse the PARALLEL Clause/title
 
-pPARALLEL controls the number of reducers invoked by Hadoop. The default 
value is 1. However, the number of reducers you need for a particular construct 
in Pig that forms a MapReduce boundary depends entirely on (1) your data and 
the number of intermediate keys you are generating in your mappers  and (2) the 
partitioner and distribution of map (combiner) output keys. In the best cases 
we have seen that a reducer processing about 500 MB of data behaves 
efficiently./p
+pUse the PARALLEL clause to increase the parallelism of a job:/p
+ul
+liPARALLEL sets the number of reduce tasks for the MapReduce jobs generated 
by Pig. The default value is 1 (one reduce task)./li
+liPARALLEL only affects the number of reduce tasks. Map parallelism is 
determined by the input file, one map for each HDFS block. /li
+liIf you don’t specify PARALLEL, you still get the same map parallelism 
but only one reduce task./li
+/ul
+p/p
+pAs noted, the default value for PARALLEL is 1 (one reduce task). However, 
the number of reducers you need for a particular construct in Pig that forms a 
MapReduce boundary depends entirely on (1) your data and the number of 
intermediate keys you are generating in your mappers  and (2) the partitioner 
and distribution of map (combiner) output keys. In the best cases we have seen 
that a reducer processing about 500 MB of data behaves efficiently./p
 
-pThe keyword makes sense with any operator that starts a reduce phase. This 
includes  
-a href=piglatin_reference.html#COGROUPCOGROUP/a, 
-a href=piglatin_reference.html#CROSSCROSS/a, 
-a href=piglatin_reference.html#DISTINCTDISTINCT/a, 
-a href=piglatin_reference.html#GROUPGROUP/a, 
-a href=piglatin_reference.html#JOINJOIN/a, 
-a href=piglatin_reference.html#ORDERORDER/a, and 
-a href=piglatin_reference.html#JOIN%2C+OUTEROUTER JOIN/a.
-
-/p
-
-pYou can set the value of PARALLEL in your scripts in conjunction with the 
operator (see the example below). You can also set the value of PARALLEL for 
all scripts using the a href=piglatin_reference.html#setset/a command./p
+pYou can include the PARALLEL clause with any operator that starts a reduce 
phase (see the example below). This includes  
+a href=piglatin_ref2.html#COGROUPCOGROUP/a, 
+a 

svn commit: r901360 [2/2] - in /hadoop/pig/branches/load-store-redesign: contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/math/ contrib/piggybank/java/src/main/java/org/apache/

2010-01-20 Thread rding
Modified: 
hadoop/pig/branches/load-store-redesign/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/PigStorageSchema.java
URL: 
http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/PigStorageSchema.java?rev=901360r1=901359r2=901360view=diff
==
--- 
hadoop/pig/branches/load-store-redesign/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/PigStorageSchema.java
 (original)
+++ 
hadoop/pig/branches/load-store-redesign/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/PigStorageSchema.java
 Wed Jan 20 20:08:28 2010
@@ -20,27 +20,13 @@
 
 import java.io.IOException;
 
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.pig.ExecType;
-import org.apache.pig.experimental.JsonMetadata;
-import org.apache.pig.experimental.LoadMetadata;
-import org.apache.pig.experimental.StoreMetadata;
-import org.apache.pig.experimental.ResourceSchema;
-import org.apache.pig.experimental.ResourceStatistics;
-import org.apache.pig.StoreConfig;
-import org.apache.pig.backend.datastorage.DataStorage;
-import org.apache.pig.backend.hadoop.datastorage.ConfigurationUtil;
-import org.apache.pig.backend.hadoop.datastorage.HDataStorage;
-import 
org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigMapReduce;
-import org.apache.pig.backend.hadoop.executionengine.util.MapRedUtil;
+import org.apache.pig.Expression;
+import org.apache.pig.LoadMetadata;
+import org.apache.pig.ResourceSchema;
+import org.apache.pig.ResourceStatistics;
+import org.apache.pig.StoreMetadata;
 import org.apache.pig.builtin.PigStorage;
-import org.apache.pig.data.DataType;
-import org.apache.pig.impl.io.FileLocalizer;
-import org.apache.pig.impl.logicalLayer.schema.Schema;
-import org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema;
 
 /**
  *  This Load/Store Func reads/writes metafiles that allow the schema and 
@@ -54,9 +40,7 @@
  *  Due to StoreFunc limitations, you can only write the metafiles in 
MapReduce 
  *  mode. You can read them in Local or MapReduce mode.
  */
-public class PigStorageSchema extends PigStorage implements StoreMetadata {
-
-private static final Log log = LogFactory.getLog(PigStorageSchema.class);
+public class PigStorageSchema extends PigStorage implements LoadMetadata, 
StoreMetadata {
 
 public PigStorageSchema() {
 super();
@@ -65,61 +49,50 @@
 public PigStorageSchema(String delim) {
 super(delim);
 }
+ 
+//
+// Implementation of LoadMetaData interface
 
 @Override
-public Schema determineSchema(String fileName, ExecType execType,
-DataStorage storage) throws IOException {
+public ResourceSchema getSchema(String location,
+Configuration conf) throws IOException {
+return (new JsonMetadata()).getSchema(location, conf);
+}
+
+@Override
+public ResourceStatistics getStatistics(String location,
+Configuration conf) throws IOException {
+return null;
+}
 
-// TODO fullPath should be retrieved ia relativeToAbsolutePath once 
PIG-966 is complete
-String fullPath = FileLocalizer.fullPath(fileName, storage);
-LoadMetadata metadataLoader = new JsonMetadata(fullPath, storage);
-ResourceSchema resourceSchema = metadataLoader.getSchema(fullPath, 
null);
-if (resourceSchema == null) {
-return null;
-}
-Schema pigSchema = new Schema();
-for (ResourceSchema.ResourceFieldSchema field : 
resourceSchema.getFields()) {
-FieldSchema pigFieldSchema = DataType.determineFieldSchema(field);
-// determineFieldSchema only sets the types. we also want the 
aliases.
-// TODO this doesn't work properly for complex types
-pigFieldSchema.alias = field.getName();
-pigSchema.add(pigFieldSchema);
-}
-log.info(Loaded Schema: +pigSchema);
-return pigSchema;
+@Override
+public void setPartitionFilter(Expression partitionFilter)
+throws IOException { 
+}
+
+@Override
+public String[] getPartitionKeys(String location, Configuration conf)
+throws IOException {
+return null;
 }
 
+//
+// Implementation of StoreMetadata
+
 @Override
-public void finish() throws IOException {
-super.finish();
-JobConf jobConf = PigMapReduce.sJobConf;
-if(jobConf != null){
-StoreConfig storeConfig = MapRedUtil.getStoreConfig(jobConf);
-DataStorage store = new 

svn commit: r901380 - /hadoop/pig/branches/load-store-redesign/src/org/apache/pig/StoreMetadata.java

2010-01-20 Thread rding
Author: rding
Date: Wed Jan 20 20:59:46 2010
New Revision: 901380

URL: http://svn.apache.org/viewvc?rev=901380view=rev
Log:
PIG-1090: Update sources to reflect recent changes in load-store interfaces

Added:

hadoop/pig/branches/load-store-redesign/src/org/apache/pig/StoreMetadata.java

Added: 
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/StoreMetadata.java
URL: 
http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/src/org/apache/pig/StoreMetadata.java?rev=901380view=auto
==
--- 
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/StoreMetadata.java 
(added)
+++ 
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/StoreMetadata.java 
Wed Jan 20 20:59:46 2010
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * License); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.pig;
+
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.pig.ResourceSchema;
+import org.apache.pig.ResourceStatistics;
+
+/**
+ * This interface defines how to write metadata related to data to be loaded.
+ * If a given store function does not implement this interface, it will be 
assumed that it
+ * is unable to record metadata about the associated data.
+ */
+
+public interface StoreMetadata {
+
+/**
+ * Store statistics about the data being written.
+ * 
+ * @throws IOException 
+ */
+void storeStatistics(ResourceStatistics stats, String location, 
Configuration conf) throws IOException;
+
+/**
+ * Store schema of the data being written
+ * 
+ * @throws IOException 
+ */
+void storeSchema(ResourceSchema schema, String location, Configuration 
conf) throws IOException;
+}