svn commit: r956794 - in /hadoop/pig/trunk: ./ lib/ src/docs/src/documentation/content/xdocs/ src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/expressionOperators/ src/org/apache/pig/ba
Author: dvryaboy Date: Tue Jun 22 07:04:41 2010 New Revision: 956794 URL: http://svn.apache.org/viewvc?rev=956794view=rev Log: PIG-1427: Monitor and kill runaway UDFs Added: hadoop/pig/trunk/lib/guava-r03.jar (with props) hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/util/MonitoredUDFExecutor.java hadoop/pig/trunk/src/org/apache/pig/builtin/MonitoredUDF.java hadoop/pig/trunk/test/org/apache/pig/test/TestMonitoredUDF.java Modified: hadoop/pig/trunk/CHANGES.txt hadoop/pig/trunk/build.xml hadoop/pig/trunk/src/docs/src/documentation/content/xdocs/udf.xml hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/expressionOperators/POUserFunc.java Modified: hadoop/pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=956794r1=956793r2=956794view=diff == --- hadoop/pig/trunk/CHANGES.txt (original) +++ hadoop/pig/trunk/CHANGES.txt Tue Jun 22 07:04:41 2010 @@ -24,6 +24,8 @@ INCOMPATIBLE CHANGES IMPROVEMENTS +PIG-1427: Monitor and kill runaway UDFs (dvryaboy) + PIG-1428: Make a StatusReporter singleton available for incrementing counters (dvryaboy) PIG-972: Make describe work with nested foreach (aniket486 via daijy) Modified: hadoop/pig/trunk/build.xml URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/build.xml?rev=956794r1=956793r2=956794view=diff == --- hadoop/pig/trunk/build.xml (original) +++ hadoop/pig/trunk/build.xml Tue Jun 22 07:04:41 2010 @@ -51,6 +51,7 @@ property name=hbase.jarfile value=hbase-0.20.0.jar / property name=hbase.test.jarfile value=hbase-0.20.0-test.jar / property name=zookeeper.jarfile value=zookeeper-hbase-1329.jar / + property name=guava.jarfile value=guava-r03.jar / !-- javac properties -- property name=javac.debug value=on / @@ -167,6 +168,7 @@ path refid=compile.classpath/ fileset file=${lib.dir}/${hadoop.jarfile} / fileset file=${lib.dir}/${hbase.jarfile} / + fileset file=${lib.dir}/${guava.jarfile} / fileset file=${lib.dir}/${hbase.test.jarfile} / fileset file=${lib.dir}/${zookeeper.jarfile}/ fileset file=${ivy.lib.dir}/jackson-mapper-asl-${jackson.version}.jar/ @@ -178,6 +180,7 @@ !-- javadoc-classpath -- path id=javadoc-classpath fileset file=${lib.dir}/${hbase.jarfile} / + fileset file=${lib.dir}/${guava.jarfile} / fileset file=${lib.dir}/${hbase.test.jarfile} / path refid=javadoc.classpath/ /path Added: hadoop/pig/trunk/lib/guava-r03.jar URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/lib/guava-r03.jar?rev=956794view=auto == Binary file - no diff available. Propchange: hadoop/pig/trunk/lib/guava-r03.jar -- svn:mime-type = application/octet-stream Modified: hadoop/pig/trunk/src/docs/src/documentation/content/xdocs/udf.xml URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/docs/src/documentation/content/xdocs/udf.xml?rev=956794r1=956793r2=956794view=diff == --- hadoop/pig/trunk/src/docs/src/documentation/content/xdocs/udf.xml (original) +++ hadoop/pig/trunk/src/docs/src/documentation/content/xdocs/udf.xml Tue Jun 22 07:04:41 2010 @@ -1229,7 +1229,6 @@ public class IntMax extends EvalFunclt; pOne problem that users run into is when they make assumption about how many times a constructor for their UDF is called. For instance, they might be creating side files in the store function and doing it in the constructor seems like a good idea. The problem with this approach is that in most cases Pig instantiates functions on the client side to, for instance, examine the schema of the data. /p pUsers should not make assumptions about how many times a function is instantiated; instead, they should make their code resilient to multiple instantiations. For instance, they could check if the files exist before creating them. /p - /section section @@ -1250,6 +1249,45 @@ public class IntMax extends EvalFunclt; pTo store information, the UDF calls getUDFProperties. This returns a Properties object which the UDF can record the information in or read the information from. To avoid name space conflicts UDFs are required to provide a signature when obtaining a Properties object. This can be done in two ways. The UDF can provide its Class object (via this.getClass()). In this case, every instantiation of the UDF will be given the same Properties object. The UDF can also provide its Class plus an array of Strings. The UDF can pass its constructor arguments, or some other
[Pig Wiki] Update of TuringCompletePig by AlanGates
Dear Wiki user, You have subscribed to a wiki page or wiki category on Pig Wiki for change notification. The TuringCompletePig page has been changed by AlanGates. http://wiki.apache.org/pig/TuringCompletePig?action=diffrev1=2rev2=3 -- Thoughts? Preferences for one of the options I did not like? Comments welcome. + == Approach 2 == + And now for something completely different. + + After thinking on the above for a week or so it occurs to me that in dismissing making Pig Latin itself Turing complete I am conflating two tasks + that could be decoupled. The first is defining a grammar for the language and extending the parser. The second is building an execution engine to execute + Pig Latin scripts. It is the second that I am concerned is too much work. Defining the grammar and building the parser is relatively easy (as + we say in the Pig team at Yahoo, parsers are easy). + + So what if we did extend Pig Latin itself to be Turing complete, but the first pass over the language was to compile it down to Java code that made + use of the existing !PigServer class to execute the code? This meets all ten requirements given above (some extra work will need to be done to meet + requirement 8 on up front semantic checking, but it is possible). It deals with my initial concern that supporting Turing completeness in Pig Latin + is too much work. It also has the exceedingly nice feature that we do not have to pick any one scripting language. The more I talked to people the + more I discovered some wanted Python, some Ruby, some Perl, some Groovy, etc. This avoids that problem. And the extensions to Pig Latin themselves + will be simple enough that it should not be onerous for people to learn it. It also means that at some future time if we decide that we want more + control over how the language is executed we can make changes without people needing to switch from whatever scripting language we embed it in. + + A significant downside to this proposal is now users have to have a Java compiler along to run their Pig Latin scripts. + + The other concerns I gave above about making Pig Latin Turing complete are somewhat addressed, but not totally. It would be possible, though + painful, to use a Java debugger on the generated Java code. Syntax highlighting and completion files could be created for Vim, Emacs, Eclipse, and + whatever other favorite editors people have. + + === Specifics === + The grammar of the language should be kept as simple as possible. The goal is not to create a general purpose programming language. + Tasks requiring these features should still be written in UDFs in Java or a scripting language. + + Each Pig Latin file would be considered as a module. All functions would have global scope within that module and would be visible once the module is + imported. + + The type system would be existing Pig Latin types (we may need to add a list type). Types would be bound at run time (this is necessary to support + existing PL grammar where A = load ... is a declaration of A). + + The grammar would look something like: + + {{{ + program: + import + | register + | define + | func_definition + | block + + import: + IMPORT _modulename_ namespace_clause + + namespace_clause: + (empty) + | AS _namespacename_ + + register: + ... // as now + + define: + ... // as now + + func_definition: + DEF _functionname_ ( arg_list ) { block } + // not sure about this, having DEF and DEFINE different keywords. + // May want to reuse DEFINE here or DEFINE FUNCTION + + arg_list: + expr + | arg_list , expr + + block: + statement + | block statement + + statement: + ; + | assignment + | if + | while + | for + | return // only valid inside functions + | CONTINUE ; // only valid inside loops + | BREAK ; // only valid inside loops + | split + | store + | dump + | fs + + assignment: + _var_ = expr ; + | _var_ = LOAD _inputsrc_ ; + ... // GROUP, FILTER, etc. as now + + statement_or_block: + statement + | { block } + + if: + IF ( expr ) statement_or_block else + + else: + (empty) + | ELSE statement_or_block + + while: + WHILE ( expr ) statement_or_block + + for: + FOR ( assignment ; expr ; expr ) statement_or_block + + return: + RETURN ; + | RETURN expr ; + + // split, dump, store, fs as now + }}} + + So the example given initially would look like: + {{{ + error = 100.0; + infile = 'original.data'; + outfile = 'result.data'; + while (error 1.0) { + A = load infile; + B = group A all; + C = foreach B generate flatten(doSomeCalculation(A)) as (result, error); + error = foreach C generate error; + store C into outfile; + if (error 1.0) fs mv outfile infile; +
[Pig Wiki] Update of TuringCompletePig by AlanGates
Dear Wiki user, You have subscribed to a wiki page or wiki category on Pig Wiki for change notification. The TuringCompletePig page has been changed by AlanGates. http://wiki.apache.org/pig/TuringCompletePig?action=diffrev1=3rev2=4 -- Object outfile = new String(result.data); while (error != null (Double)error 1.0) { PigServer ps = new PigServer(); - ps.registerQuery(A = load infile;); + ps.registerQuery(A = load + infile + ;); ps.registerQuery(B = group A all;); ps.registerQuery(C = foreach B generate flatten(doSomeCalculation(A)) as (result, error);); ps.registerQuery(error = foreach C generate error;);
svn commit: r957033 - /hadoop/pig/trunk/src/docs/src/documentation/content/xdocs/udf.xml
Author: rding Date: Tue Jun 22 21:11:27 2010 New Revision: 957033 URL: http://svn.apache.org/viewvc?rev=957033view=rev Log: fix documentation typo Modified: hadoop/pig/trunk/src/docs/src/documentation/content/xdocs/udf.xml Modified: hadoop/pig/trunk/src/docs/src/documentation/content/xdocs/udf.xml URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/docs/src/documentation/content/xdocs/udf.xml?rev=957033r1=957032r2=957033view=diff == --- hadoop/pig/trunk/src/docs/src/documentation/content/xdocs/udf.xml (original) +++ hadoop/pig/trunk/src/docs/src/documentation/content/xdocs/udf.xml Tue Jun 22 21:11:27 2010 @@ -1289,6 +1289,7 @@ public class IntMax extends EvalFunclt; pCurrently the MonitoredUDF annotation works with regular and Algebraic UDFs, but has no effect on UDFs that run in the Accumulator mode./p /section +/section /body /document
svn commit: r957046 - in /hadoop/pig/trunk/src/org/apache/pig: builtin/MonitoredUDF.java tools/pigstats/PigStatusReporter.java
Author: rding Date: Tue Jun 22 21:46:57 2010 New Revision: 957046 URL: http://svn.apache.org/viewvc?rev=957046view=rev Log: fix javadoc warnings Modified: hadoop/pig/trunk/src/org/apache/pig/builtin/MonitoredUDF.java hadoop/pig/trunk/src/org/apache/pig/tools/pigstats/PigStatusReporter.java Modified: hadoop/pig/trunk/src/org/apache/pig/builtin/MonitoredUDF.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/builtin/MonitoredUDF.java?rev=957046r1=957045r2=957046view=diff == --- hadoop/pig/trunk/src/org/apache/pig/builtin/MonitoredUDF.java (original) +++ hadoop/pig/trunk/src/org/apache/pig/builtin/MonitoredUDF.java Tue Jun 22 21:46:57 2010 @@ -72,7 +72,6 @@ public @interface MonitoredUDF { /** * UDF author can implement a static extension of MonitoredUDFExecutor.ErrorCallback and provide its class * to the annotation in order to perform custom error handling. - * @return */ Class? extends ErrorCallback errorCallback() default MonitoredUDFExecutor.ErrorCallback.class; } Modified: hadoop/pig/trunk/src/org/apache/pig/tools/pigstats/PigStatusReporter.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/tools/pigstats/PigStatusReporter.java?rev=957046r1=957045r2=957046view=diff == --- hadoop/pig/trunk/src/org/apache/pig/tools/pigstats/PigStatusReporter.java (original) +++ hadoop/pig/trunk/src/org/apache/pig/tools/pigstats/PigStatusReporter.java Tue Jun 22 21:46:57 2010 @@ -34,7 +34,6 @@ public class PigStatusReporter extends S private static PigStatusReporter reporter = null; /** * Get singleton instance of the context - * @param context */ public static PigStatusReporter getInstance() { if (reporter == null) {