svn commit: r706460 - in /incubator/pig/branches/types: CHANGES.txt src/org/apache/pig/tools/pigscript/parser/PigScriptParser.jj test/org/apache/pig/test/TestGrunt.java

2008-10-20 Thread olga
Author: olga
Date: Mon Oct 20 16:36:09 2008
New Revision: 706460

URL: http://svn.apache.org/viewvc?rev=706460&view=rev
Log:
PIG-499: parser issue with as

Modified:
incubator/pig/branches/types/CHANGES.txt

incubator/pig/branches/types/src/org/apache/pig/tools/pigscript/parser/PigScriptParser.jj
incubator/pig/branches/types/test/org/apache/pig/test/TestGrunt.java

Modified: incubator/pig/branches/types/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/incubator/pig/branches/types/CHANGES.txt?rev=706460&r1=706459&r2=706460&view=diff
==
--- incubator/pig/branches/types/CHANGES.txt (original)
+++ incubator/pig/branches/types/CHANGES.txt Mon Oct 20 16:36:09 2008
@@ -291,3 +291,5 @@
 
 PIG-500: Load Func for POCast is not being set in some cases (sms via
 olgan)
+
+PIG-499: parser issue with as (sms via olgan)

Modified: 
incubator/pig/branches/types/src/org/apache/pig/tools/pigscript/parser/PigScriptParser.jj
URL: 
http://svn.apache.org/viewvc/incubator/pig/branches/types/src/org/apache/pig/tools/pigscript/parser/PigScriptParser.jj?rev=706460&r1=706459&r2=706460&view=diff
==
--- 
incubator/pig/branches/types/src/org/apache/pig/tools/pigscript/parser/PigScriptParser.jj
 (original)
+++ 
incubator/pig/branches/types/src/org/apache/pig/tools/pigscript/parser/PigScriptParser.jj
 Mon Oct 20 16:36:09 2008
@@ -192,8 +192,8 @@
 {
<"'"> {prevState = PIG_START;} : IN_STRING
 |  <"`"> {prevState = PIG_START;} : IN_COMMAND
-|  <(" " | "\t")*["A","a"]["S","s"](" " | "\t")* > {prevState = 
PIG_START;} : SCHEMA_DEFINITION
-|   <(" " | 
"\t")*["G","g"]["E","e"]["N","n"]["E","e"]["R","r"]["A","a"]["T","t"]["E","e"]("
 " | "\t")* > {prevState = PIG_START;} : GENERATE
+|  <(" " | "\t")+["A","a"]["S","s"](" " | "\t")+ > {prevState = 
PIG_START;} : SCHEMA_DEFINITION
+|   <(" " | 
"\t")+["G","g"]["E","e"]["N","n"]["E","e"]["R","r"]["A","a"]["T","t"]["E","e"]("
 " | "\t")+ > {prevState = PIG_START;} : GENERATE
 |   <"{"> {pigBlockLevel = 1;} : IN_BLOCK
 |   <"}"> {if (true) throw new TokenMgrError("Unmatched '}'", 
TokenMgrError.LEXICAL_ERROR);}
 |   <";"> : PIG_END
@@ -289,8 +289,8 @@
  MORE :
 {
<"\""> {prevState = IN_BLOCK;} : IN_STRING
-|  <(" " | "\t")*["A","a"]["S","s"](" " | "\t")* > {prevState = IN_BLOCK;} 
: SCHEMA_DEFINITION
-|   <(" " | 
"\t")*["G","g"]["E","e"]["N","n"]["E","e"]["R","r"]["A","a"]["T","t"]["E","e"]("
 " | "\t")*> {prevState = IN_BLOCK;} : GENERATE
+|  <(" " | "\t")+["A","a"]["S","s"](" " | "\t")+ > {prevState = IN_BLOCK;} 
: SCHEMA_DEFINITION
+|   <(" " | 
"\t")+["G","g"]["E","e"]["N","n"]["E","e"]["R","r"]["A","a"]["T","t"]["E","e"]("
 " | "\t")+> {prevState = IN_BLOCK;} : GENERATE
 |  <"{"> {pigBlockLevel++;}
 |   <"}"(";")?> {pigBlockLevel--; if (pigBlockLevel == 0) 
SwitchTo(PIG_END);}
 |  <"'"> {prevState = IN_BLOCK;} : IN_STRING

Modified: incubator/pig/branches/types/test/org/apache/pig/test/TestGrunt.java
URL: 
http://svn.apache.org/viewvc/incubator/pig/branches/types/test/org/apache/pig/test/TestGrunt.java?rev=706460&r1=706459&r2=706460&view=diff
==
--- incubator/pig/branches/types/test/org/apache/pig/test/TestGrunt.java 
(original)
+++ incubator/pig/branches/types/test/org/apache/pig/test/TestGrunt.java Mon 
Oct 20 16:36:09 2008
@@ -77,7 +77,7 @@
 PigServer server = new PigServer("MAPREDUCE");
 PigContext context = server.getPigContext();
 
-String strCmd = "a = load 'input1'as (b: bag{t(i: int, c:chararray, f: 
float)});\n";
+String strCmd = "a = load 'input1' as (b: bag{t(i: int, c:chararray, 
f: float)});\n";
 
 ByteArrayInputStream cmd = new ByteArrayInputStream(strCmd.getBytes());
 InputStreamReader reader = new InputStreamReader(cmd);
@@ -88,6 +88,25 @@
 }
 
 @Test 
+public void testBagSchemaFail() throws Throwable {
+PigServer server = new PigServer("MAPREDUCE");
+PigContext context = server.getPigContext();
+
+String strCmd = "a = load 'input1'as (b: bag{t(i: int, c:chararray, f: 
float)});\n";
+
+ByteArrayInputStream cmd = new ByteArrayInputStream(strCmd.getBytes());
+InputStreamReader reader = new InputStreamReader(cmd);
+
+Grunt grunt = new Grunt(new BufferedReader(reader), context);
+
+try {
+grunt.exec();
+} catch (Exception e) {
+assertTrue(e.getMessage().contains("Encountered \";\""));
+}
+}
+
+@Test 
 public void testBagConstant() throws Throwable {
 PigServer server = new PigServer("MAPREDUCE");
 PigContext context = server.getPigContext();
@@ -207,6 +226,21 @@
 grunt.exec();
 }
 
+@Test 
+public void testParsingWordWithAsInForeachW

[Pig Wiki] Update of "PigErrorHandling" by SanthoshSrinivasan

2008-10-20 Thread Apache Wiki
Dear Wiki user,

You have subscribed to a wiki page or wiki category on "Pig Wiki" for change 
notification.

The following page has been changed by SanthoshSrinivasan:
http://wiki.apache.org/pig/PigErrorHandling

--
- = Error Handling =
+ #format wiki
+ #language en
+ 
+ [[Navigation(children)]]
+ [[TableOfContents]]
  
  This document captures the requirements for the Error Handling feature in Pig.
  
@@ -16, +20 @@

  
  == Background ==
  
- Using the approach mentioned in ref1, Pig can be divided into three 
components for the purpose of error handling. A schematic view of the system is 
illustrated via the diagram.
+ Using the approach mentioned in [#ref1 Mika], Pig can be divided into three 
components for the purpose of error handling. A schematic view of the system is 
illustrated via the diagram.
  
   1. The user interface. This could be the grunt shell or the command line 
execution of a script or using Pig via the Java APIs
   2. Pig
@@ -57, +61 @@

   1. Users will be able switch to turn on/off warning message aggregation
  
  
- == References: ==
+ == References ==
  
-  1. Mika Raento, "What should Exceptions look like?" July 30, 2006, 
http://www.errorhandling.org/wordpress/
+  1. [[Anchor(ref1)]] Mika Raento, "What should Exceptions look like?" July 
30, 2006, http://www.errorhandling.org/wordpress/
-  2. Bruce Eckel, "Thinking in Java", 3rd Edition Revision 4.0, November 20, 
2002, http://www.faqs.org/docs/think_java/TIJ3_c.htm
+  2. [[Anchor(ref2)]] Bruce Eckel, "Thinking in Java", 3rd Edition Revision 
4.0, November 20, 2002, http://www.faqs.org/docs/think_java/TIJ3_c.htm
  


[Pig Wiki] Update of "PigErrorHandling" by SanthoshSrinivasan

2008-10-20 Thread Apache Wiki
Dear Wiki user,

You have subscribed to a wiki page or wiki category on "Pig Wiki" for change 
notification.

The following page has been changed by SanthoshSrinivasan:
http://wiki.apache.org/pig/PigErrorHandling

--
  
   1. Environment issues: file not found, out of disk space, etc.
   2. Bugs in the software: null pointer exceptions, core dumps, out of bound 
access, etc.
-  3. Programmer error: Syntax errors, divide by zero, incorrect use of casts, 
etc.
+  3. User/Programmer error: Syntax errors, divide by zero, incorrect use of 
casts, etc.
  
  Users rely on the error messages to inform them about the source of the error 
along with a reasonable message that will influence the corrective course of 
action. While most errors cannot be handled in the system, at the least they 
should be reported in a reliable and readable manner.
  
@@ -18, +18 @@

  
  Using the approach mentioned in ref1, Pig can be divided into three 
components for the purpose of error handling. A schematic view of the system is 
illustrated via the diagram.
  
- attachment:SchematicDiagaramOfPig
- 
   1. The user interface. This could be the grunt shell or the command line 
execution of a script or using Pig via the Java APIs
   2. Pig
   3. The backend execution framework, i.e., Hadoop
+ 
+ attachment:Schematic.jpg
  
  Grunt is an interactive shell that allows users to submit Pig commands. The 
command line offers a mechanism for batch mode execution via scripts. The Java 
APIs provide a programmatic mechanism of accessing Pig. Irrespective of the 
mechanism, the control and data flow through Pig which in turn uses Hadoop as 
the execution framework. Errors could occur within each system and across 
systems.
  
@@ -31, +31 @@

  
  === Early error detection ===
  
- Errors that occur in each system should be caught as early as possible. A few 
examples that demonstrate this behavior are: 
+ Errors that occur in each system should be caught as early as possible. Pig 
relies on Hadoop for run time execution. Detection and reporting errors early 
will improve turnaround time by avoiding invoking Hadoop till most errors are 
fixed. A few examples that demonstrate this behavior are: 
  
   1. Syntax errors. E.g.: Missing ';'
   2. Semantic errors. E.g: Mismatch in cogroup arity
   3. Validation errors. E.g: Type mismatch when trying to add a string to an 
integer
- 
- Pig relies on Hadoop for run time execution. Detection and reporting errors 
early will improve turnaround time.
  
  === Error reporting ===
  
@@ -47, +45 @@

  
   1. Users are responsible for purging error logs
   2. Users will be able switch on/off the detailed error messages on STDERR.
-  3. Since Pig depends on Hadoop for execution, Hadoop error messages will be 
reported by Pig
+  3. Since Pig depends on Hadoop for execution, Hadoop error messages will be 
reported by Pig. An error during execution due to a bug in Pig will be shown 
differently from that of an error in Hadoop itself.
  
  
  === Warning message aggregation ===


[Pig Wiki] Update of "PigErrorHandling" by SanthoshSrinivasan

2008-10-20 Thread Apache Wiki
Dear Wiki user,

You have subscribed to a wiki page or wiki category on "Pig Wiki" for change 
notification.

The following page has been changed by SanthoshSrinivasan:
http://wiki.apache.org/pig/PigErrorHandling

--
  == Background ==
  
  Using the approach mentioned in ref1, Pig can be divided into three 
components for the purpose of error handling. A schematic view of the system is 
illustrated via the diagram.
+ 
+ attachment:SchematicDiagaramOfPig
  
   1. The user interface. This could be the grunt shell or the command line 
execution of a script or using Pig via the Java APIs
   2. Pig


[Pig Wiki] Update of "PigErrorHandling" by SanthoshSrinivasan

2008-10-20 Thread Apache Wiki
Dear Wiki user,

You have subscribed to a wiki page or wiki category on "Pig Wiki" for change 
notification.

The following page has been changed by SanthoshSrinivasan:
http://wiki.apache.org/pig/PigErrorHandling

New page:
= Error Handling =

This document captures the requirements for the Error Handling feature in Pig.

== Introduction ==

Robustness of software is an implicit requirement. Users expect and rightfully 
so, that software reports clearly and in an understandable fashion about the 
errors that occur when using the software. The errors encountered while using 
the software could be due to multiple sources. Some of the popular ones are:

 1. Environment issues: file not found, out of disk space, etc.
 2. Bugs in the software: null pointer exceptions, core dumps, out of bound 
access, etc.
 3. Programmer error: Syntax errors, divide by zero, incorrect use of casts, 
etc.

Users rely on the error messages to inform them about the source of the error 
along with a reasonable message that will influence the corrective course of 
action. While most errors cannot be handled in the system, at the least they 
should be reported in a reliable and readable manner.

== Background ==

Using the approach mentioned in ref1, Pig can be divided into three components 
for the purpose of error handling. A schematic view of the system is 
illustrated via the diagram.

 1. The user interface. This could be the grunt shell or the command line 
execution of a script or using Pig via the Java APIs
 2. Pig
 3. The backend execution framework, i.e., Hadoop

Grunt is an interactive shell that allows users to submit Pig commands. The 
command line offers a mechanism for batch mode execution via scripts. The Java 
APIs provide a programmatic mechanism of accessing Pig. Irrespective of the 
mechanism, the control and data flow through Pig which in turn uses Hadoop as 
the execution framework. Errors could occur within each system and across 
systems.


== Requirements ==

=== Early error detection ===

Errors that occur in each system should be caught as early as possible. A few 
examples that demonstrate this behavior are: 

 1. Syntax errors. E.g.: Missing ';'
 2. Semantic errors. E.g: Mismatch in cogroup arity
 3. Validation errors. E.g: Type mismatch when trying to add a string to an 
integer

Pig relies on Hadoop for run time execution. Detection and reporting errors 
early will improve turnaround time.

=== Error reporting ===

Provide users with readable error messages. Stack traces provide a good 
mechanism to help debugging but do not mean much to the user. Readable and 
simple error messages will be presented on STDERR. Error codes will be devised 
for common error messages. Detailed information like stack trace will be logged 
into client side logs. Users can send logs that contain the details of the 
error like stack trace to assist developers in resolving issues.

'''Note:''' 

 1. Users are responsible for purging error logs
 2. Users will be able switch on/off the detailed error messages on STDERR.
 3. Since Pig depends on Hadoop for execution, Hadoop error messages will be 
reported by Pig


=== Warning message aggregation ===

With the introduction of types and NULLs into Pig, there are several scenarios 
where Pig warns the user about introduction of casts, divide by zero uses, etc. 
The warning messages are issued on each occurrence of the warning. While this 
message is useful, the increased frequency of the messages is annoying and 
distracts the user from possible error messages. In order to alleviate this 
problem, warning message aggregation will be supported to report the warning 
message and the number of occurrences of the warning message.

'''Note:'''

 1. Users will be able switch to turn on/off warning message aggregation


== References: ==

 1. Mika Raento, "What should Exceptions look like?" July 30, 2006, 
http://www.errorhandling.org/wordpress/
 2. Bruce Eckel, "Thinking in Java", 3rd Edition Revision 4.0, November 20, 
2002, http://www.faqs.org/docs/think_java/TIJ3_c.htm


[Pig Wiki] Update of "FrontPage" by SanthoshSrinivasan

2008-10-20 Thread Apache Wiki
Dear Wiki user,

You have subscribed to a wiki page or wiki category on "Pig Wiki" for change 
notification.

The following page has been changed by SanthoshSrinivasan:
http://wiki.apache.org/pig/FrontPage

--
* ParameterSubstitution
* PigOptimizationWishList
* NestedLogicalPlan (still draft version)
+   * PigErrorHandling
   * Performance
* PigPerformance (current performance numbers)