Author: rohini Date: Fri Aug 12 21:42:19 2022 New Revision: 1903377 URL: http://svn.apache.org/viewvc?rev=1903377&view=rev Log: PIG-5253: Pig Hadoop 3 support (nkollar,szita via rohini)
Added: pig/trunk/ivy/libraries-h2.properties pig/trunk/ivy/libraries-h3.properties pig/trunk/test/org/apache/pig/test/MapReduceMiniCluster.java Modified: pig/trunk/CHANGES.txt pig/trunk/bin/pig pig/trunk/bin/pig.py pig/trunk/build.xml pig/trunk/ivy.xml pig/trunk/ivy/libraries.properties pig/trunk/test/org/apache/pig/parser/TestErrorHandling.java pig/trunk/test/org/apache/pig/parser/TestQueryParserUtils.java pig/trunk/test/org/apache/pig/test/MiniCluster.java pig/trunk/test/org/apache/pig/test/MiniGenericCluster.java pig/trunk/test/org/apache/pig/test/SparkMiniCluster.java pig/trunk/test/org/apache/pig/test/TestGrunt.java pig/trunk/test/org/apache/pig/test/TezMiniCluster.java pig/trunk/test/org/apache/pig/test/Util.java pig/trunk/test/org/apache/pig/test/YarnMiniCluster.java Modified: pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1903377&r1=1903376&r2=1903377&view=diff ============================================================================== --- pig/trunk/CHANGES.txt (original) +++ pig/trunk/CHANGES.txt Fri Aug 12 21:42:19 2022 @@ -26,6 +26,8 @@ PIG-5282: Upgade to Java 8 (satishsaley IMPROVEMENTS +PIG-5253: Pig Hadoop 3 support (nkollar,szita via rohini) + PIG-5377: Move supportsParallelWriteToStoreLocation from StoreFunc to StoreFuncInterfce (kpriceyahoo via rohini) PIG-5398: SparkLauncher does not read SPARK_CONF_DIR/spark-defaults.conf (knoguchi) Modified: pig/trunk/bin/pig URL: http://svn.apache.org/viewvc/pig/trunk/bin/pig?rev=1903377&r1=1903376&r2=1903377&view=diff ============================================================================== --- pig/trunk/bin/pig (original) +++ pig/trunk/bin/pig Fri Aug 12 21:42:19 2022 @@ -319,15 +319,6 @@ if [[ -z "$HADOOP_HOME" && -n "$HADOOP_B HADOOP_HOME=`dirname $HADOOP_BIN`/.. fi -HADOOP_CORE_JAR=`echo ${HADOOP_HOME}/hadoop-core*.jar` - -if [ -z "$HADOOP_CORE_JAR" ]; then - HADOOP_VERSION=2 -else - echo "Pig requires Hadoop 2 to be present in HADOOP_HOME (currently: $HADOOP_HOME). Please install Hadoop 2.x" - exit 1 -fi - # if using HBase, likely want to include HBase jars and config HBH=${HBASE_HOME:-"${PIG_HOME}/share/hbase"} if [ -d "${HBH}" ]; then @@ -442,9 +433,12 @@ fi # run it if [ -n "$HADOOP_BIN" ]; then if [ "$debug" == "true" ]; then - echo "Find hadoop at $HADOOP_BIN" + echo "Found hadoop at $HADOOP_BIN" fi + HADOOP_VERSION_LONG=`hadoop version 2>/dev/null | head -1 | sed -e 's/Hadoop //g'` + HADOOP_VERSION=`echo "$HADOOP_VERSION_LONG" | cut -c 1` + PIG_JAR=`echo $PIG_HOME/pig*-core-h${HADOOP_VERSION}.jar` # for deb/rpm package, add pig jar in /usr/share/pig @@ -455,7 +449,11 @@ if [ -n "$HADOOP_BIN" ]; then if [ -n "$PIG_JAR" ]; then CLASSPATH=${CLASSPATH}:$PIG_JAR else - echo "Cannot locate pig-core-h${HADOOP_VERSION}.jar. do 'ant jar', and try again" + if [ "$HADOOP_VERSION" == "2" ]; then + echo "Cannot locate pig-core-h${HADOOP_VERSION}.jar (found Hadoop $HADOOP_VERSION_LONG). Do 'ant clean jar', and try again" + else + echo "Cannot locate pig-core-h${HADOOP_VERSION}.jar (found Hadoop $HADOOP_VERSION_LONG). Do 'ant -Dhadoopversion=3 clean jar', and try again" + fi exit 1 fi @@ -477,21 +475,26 @@ if [ -n "$HADOOP_BIN" ]; then fi else # use bundled hadoop to run local mode - PIG_JAR=`echo $PIG_HOME/pig*-core-h2.jar` + PIG_JAR=`echo $PIG_HOME/pig*-core-h*.jar` + HADOOP_VERSION=`echo "$PIG_JAR" | rev | cut -c -5 | rev | cut -c 1` if [ -n "$PIG_JAR" ]; then CLASSPATH="${CLASSPATH}:$PIG_JAR" else - echo "Cannot locate pig.jar. do 'ant jar', and try again" + if [ "$HADOOP_VERSION" == "2" ]; then + echo "Cannot locate pig-core-h${HADOOP_VERSION}.jar (found no Hadoop installation). Do 'ant clean jar', and try again" + else + echo "Cannot locate pig-core-h${HADOOP_VERSION}.jar (found no Hadoop installation). Do 'ant -Dhadoopversion=3 clean jar', and try again" + fi exit 1 fi - for f in $PIG_HOME/lib/h2/*.jar; do + for f in $PIG_HOME/lib/h${HADOOP_VERSION}/*.jar; do CLASSPATH=${CLASSPATH}:$f; done # Add bundled hadoop jars - for f in $PIG_HOME/lib/hadoop2-runtime/*.jar; do + for f in $PIG_HOME/lib/hadoop${HADOOP_VERSION}-runtime/*.jar; do CLASSPATH=${CLASSPATH}:$f; done Modified: pig/trunk/bin/pig.py URL: http://svn.apache.org/viewvc/pig/trunk/bin/pig.py?rev=1903377&r1=1903376&r2=1903377&view=diff ============================================================================== --- pig/trunk/bin/pig.py (original) +++ pig/trunk/bin/pig.py Fri Aug 12 21:42:19 2022 @@ -38,10 +38,12 @@ # when using HBaseStorage +from distutils.spawn import find_executable import sys import os import glob import subprocess +import re debug = False restArgs = [] @@ -124,9 +126,6 @@ except: if 'JAVA_HOME' not in os.environ: sys.exit('Error: JAVA_HOME is not set') -if 'HADOOP_HOME' not in os.environ: - os.environ['HADOOP_HOME'] = os.path.sep + 'usr' - java = os.path.join(os.environ['JAVA_HOME'], 'bin', 'java') javaHeapMax = "-Xmx1000m" @@ -306,7 +305,8 @@ pigOpts += " -Dpig.home.dir=" + os.envir pigJar = "" hadoopBin = "" -print "HADOOP_HOME: %s" % os.path.expandvars(os.environ['HADOOP_HOME']) +hadoopHomePath = None +hadoopPrefixPath = None if (os.environ.get('HADOOP_PREFIX') is not None): print "Found a hadoop prefix" @@ -334,16 +334,18 @@ if (hadoopHomePath is None and hadoopPre if (os.environ.get('HADOOP_HOME') is None and hadoopBin != ""): hadoopHomePath = os.path.join(hadoopBin, "..") -hadoopCoreJars = glob.glob(os.path.join(hadoopHomePath, "hadoop-core*.jar")) -if len(hadoopCoreJars) == 0: - hadoopVersion = 2 -else: - sys.exit("Cannot locate Hadoop 2 binaries, please install Hadoop 2.x and try again.") +if hadoopBin == "": + hadoopBin = find_executable('hadoop') + if hadoopBin != "": if debug == True: print "Find hadoop at %s" % hadoopBin + hadoopVersionQueryResult = subprocess.check_output([hadoopBin, "version"]) + hadoopVersionLong = re.search('Hadoop (.*)',hadoopVersionQueryResult).group(1) + hadoopVersion = hadoopVersionLong[0] + if os.path.exists(os.path.join(os.environ['PIG_HOME'], "pig-core-h$hadoopVersion.jar")): pigJar = os.path.join(os.environ['PIG_HOME'], "pig-core-h$hadoopVersion.jar") @@ -361,7 +363,10 @@ if hadoopBin != "": if len(pigJars) == 1: pigJar = pigJars[0] else: - sys.exit("Cannot locate pig-core-h2.jar do 'ant jar', and try again") + if (hadoopVersion == "3"): + sys.exit("Cannot locate pig-core-h" + str(hadoopVersion) + ".jar (found Hadoop " + str(hadoopVersionLong) + ") do 'ant clean jar -Dhadoopversion=3', and try again") + if (hadoopVersion == "2"): + sys.exit("Cannot locate pig-core-h" + str(hadoopVersion) + ".jar (found Hadoop " + str(hadoopVersionLong) + ") do 'ant clean jar', and try again") pigLibJars = glob.glob(os.path.join(os.environ['PIG_HOME']+"/lib", "h" + str(hadoopVersion), "*.jar")) for jar in pigLibJars: @@ -390,29 +395,28 @@ if hadoopBin != "": else: # fall back to use fat pig.jar if debug == True: - print "Cannot find local hadoop installation, using bundled hadoop 2" + print "Cannot find local hadoop installation, using bundled hadoop" - if os.path.exists(os.path.join(os.environ['PIG_HOME'], "pig-core-h2.jar")): - pigJar = os.path.join(os.environ['PIG_HOME'], "pig-core-h2.jar") - else: - pigJars = glob.glob(os.path.join(os.environ['PIG_HOME'], "pig-*-core-h2.jar")) + pigJars = glob.glob(os.path.join(os.environ['PIG_HOME'], "pig-*-core-h*.jar")) - if len(pigJars) == 1: - pigJar = pigJars[0] + if len(pigJars) == 1: + pigJar = pigJars[0] - elif len(pigJars) > 1: - print "Ambiguity with pig jars found the following jars" - print pigJars - sys.exit("Please remove irrelavant jars from %s" % os.path.join(os.environ['PIG_HOME'], "pig-core-h2.jar")) - else: - sys.exit("Cannot locate pig-core-h2.jar. do 'ant jar' and try again") + elif len(pigJars) > 1: + print "Ambiguity with pig jars found the following jars" + print pigJars + sys.exit("Please remove irrelavant jars from %s" % os.path.join(os.environ['PIG_HOME'])) + else: + sys.exit("Cannot locate pig-core-h2.jar. do 'ant jar' and try again") + + hadoopVersion = re.search("pig.*core-h(.)\.jar",pigJar).group(1) - pigLibJars = glob.glob(os.path.join(os.environ['PIG_HOME']+"/lib", "h2", "*.jar")) + pigLibJars = glob.glob(os.path.join(os.environ['PIG_HOME']+"/lib", "h"+hadoopVersion, "*.jar")) for jar in pigLibJars: classpath += os.pathsep + jar - pigLibJars = glob.glob(os.path.join(os.environ['PIG_HOME']+"/lib", "hadoop2-runtime", "*.jar")) + pigLibJars = glob.glob(os.path.join(os.environ['PIG_HOME']+"/lib", "hadoop"+hadoopVersion+"-runtime", "*.jar")) for jar in pigLibJars: classpath += os.pathsep + jar Modified: pig/trunk/build.xml URL: http://svn.apache.org/viewvc/pig/trunk/build.xml?rev=1903377&r1=1903376&r2=1903377&view=diff ============================================================================== --- pig/trunk/build.xml (original) +++ pig/trunk/build.xml Fri Aug 12 21:42:19 2022 @@ -78,6 +78,7 @@ <!-- artifact jar file names --> <property name="artifact.pig.jar" value="${final.name}.jar"/> <property name="artifact.pig-h2.jar" value="${final.name}-h2.jar"/> + <property name="artifact.pig-h3.jar" value="${final.name}-h3.jar"/> <property name="artifact.pig-sources.jar" value="${final.name}-sources.jar"/> <property name="artifact.pig-javadoc.jar" value="${final.name}-javadoc.jar"/> <property name="artifact.pig.tar" value="${final.name}.tar.gz"/> @@ -85,12 +86,15 @@ <!-- jar names. TODO we might want to use the svn reversion name in the name in case it is a dev version --> <property name="output.jarfile.withouthadoop" value="${build.dir}/${final.name}-withouthadoop.jar" /> <property name="output.jarfile.withouthadoop-h2" value="${legacy.dir}/${final.name}-withouthadoop-h2.jar" /> + <property name="output.jarfile.withouthadoop-h3" value="${legacy.dir}/${final.name}-withouthadoop-h3.jar" /> <property name="output.jarfile.core" value="${build.dir}/${artifact.pig.jar}" /> <property name="output.jarfile.core-h2" value="${build.dir}/${artifact.pig-h2.jar}" /> + <property name="output.jarfile.core-h3" value="${build.dir}/${artifact.pig-h3.jar}" /> <property name="output.jarfile.sources" value="${build.dir}/${artifact.pig-sources.jar}" /> <property name="output.jarfile.javadoc" value="${build.dir}/${artifact.pig-javadoc.jar}" /> <!-- Maintain old pig.jar in top level directory. --> <property name="output.jarfile.backcompat-core-h2" value="${basedir}/${final.name}-core-h2.jar" /> + <property name="output.jarfile.backcompat-core-h3" value="${basedir}/${final.name}-core-h3.jar" /> <!-- test properties --> <condition property="test.exec.type" value="${exectype}" else="mr"> @@ -157,10 +161,8 @@ <target name="setTezEnv"> <propertyreset name="test.timeout" value="900000" /> - <propertyreset name="hadoopversion" value="2" /> - <propertyreset name="isHadoop2" value="true" /> - <propertyreset name="src.shims.dir" value="${basedir}/shims/src/hadoop${hadoopversion}" /> - <propertyreset name="src.shims.test.dir" value="${basedir}/shims/test/hadoop${hadoopversion}" /> + <propertyreset name="src.shims.dir" value="${basedir}/shims/src/hadoop2" /> + <propertyreset name="src.shims.test.dir" value="${basedir}/shims/test/hadoop2" /> <propertyreset name="test.exec.type" value="tez" /> </target> @@ -207,7 +209,6 @@ <property name="ivy.repo.dir" value="${user.home}/ivyrepo" /> <property name="ivy.dir" location="ivy" /> <property name="loglevel" value="quiet" /> - <loadproperties srcfile="${ivy.dir}/libraries.properties" /> <!-- Hadoop master version @@ -227,6 +228,10 @@ <equals arg1="${hadoopversion}" arg2="2"/> </condition> + <condition property="isHadoop3"> + <equals arg1="${hadoopversion}" arg2="3"/> + </condition> + <!-- HBase master version (Value 95 is translated for backward compatibility in old build scripts) @@ -247,8 +252,10 @@ <equals arg1="${sparkversion}" arg2="1"/> </condition> - <property name="src.shims.dir" value="${basedir}/shims/src/hadoop${hadoopversion}" /> - <property name="src.shims.test.dir" value="${basedir}/shims/test/hadoop${hadoopversion}" /> + <loadproperties srcfile="${ivy.dir}/libraries.properties"/> + + <property name="src.shims.dir" value="${basedir}/shims/src/hadoop2" /> + <property name="src.shims.test.dir" value="${basedir}/shims/test/hadoop2" /> <property name="src.hive.shims.dir" value="${basedir}/shims/src/hive${hiveversion}" /> <property name="asfrepo" value="https://repository.apache.org"/> @@ -710,6 +717,8 @@ <antcall target="copySparkDependencies"/> <antcall target="copyh2Dependencies"/> <antcall target="copyHadoop2LocalRuntimeDependencies" /> + <antcall target="copyh3Dependencies"/> + <antcall target="copyHadoop3LocalRuntimeDependencies" /> </target> <target name="copyCommonDependencies"> @@ -757,8 +766,8 @@ <fileset dir="${ivy.lib.dir.spark}" includes="*.jar"/> </copy> </target> - - <target name="copyh2Dependencies" if="isHadoop2"> + + <target name="copyh2Dependencies" unless="isHadoop3"> <mkdir dir="${lib.dir}/h2" /> <copy todir="${lib.dir}/h2"> <fileset dir="${ivy.lib.dir}" includes="avro-mapred-*.jar"/> @@ -773,7 +782,7 @@ <move file="${output.jarfile.withouthadoop}" tofile="${output.jarfile.withouthadoop-h2}"/> </target> - <target name="copyHadoop2LocalRuntimeDependencies"> + <target name="copyHadoop2LocalRuntimeDependencies" unless="isHadoop3"> <mkdir dir="${lib.dir}/hadoop2-runtime" /> <copy todir="${lib.dir}/hadoop2-runtime"> <fileset dir="${ivy.lib.dir}" includes="hadoop-*.jar"/> @@ -790,6 +799,44 @@ </copy> </target> + <target name="copyh3Dependencies" if="isHadoop3"> + <mkdir dir="${lib.dir}/h3" /> + <copy todir="${lib.dir}/h3"> + <fileset dir="${ivy.lib.dir}" includes="avro-mapred-*.jar"/> + <fileset dir="${ivy.lib.dir}" includes="hive-shims-0.*.jar"/> + <fileset dir="${ivy.lib.dir}" includes="hbase-hadoop2*.jar"/> + <fileset dir="${ivy.lib.dir}" includes="tez-*.jar"/> + <fileset dir="${ivy.lib.dir}" includes="commons-collections4-*.jar"/> + </copy> + <copy file="${output.jarfile.core}" tofile="${output.jarfile.backcompat-core-h3}"/> + <mkdir dir="${legacy.dir}" /> + <move file="${output.jarfile.withouthadoop}" tofile="${output.jarfile.withouthadoop-h3}"/> + </target> + + <target name="copyHadoop3LocalRuntimeDependencies" if="isHadoop3"> + <mkdir dir="${lib.dir}/hadoop3-runtime" /> + <copy todir="${lib.dir}/hadoop3-runtime"> + <fileset dir="${ivy.lib.dir}" includes="hadoop-*.jar"/> + <fileset dir="${ivy.lib.dir}" includes="commons-cli-*.jar"/> + <fileset dir="${ivy.lib.dir}" includes="commons-configuration-*.jar"/> + <fileset dir="${ivy.lib.dir}" includes="commons-collections-*.jar"/> + <fileset dir="${ivy.lib.dir}" includes="commons-lang-*.jar"/> + <fileset dir="${ivy.lib.dir}" includes="commons-codec-*.jar"/> + <fileset dir="${ivy.lib.dir}" includes="commons-io-*.jar"/> + <fileset dir="${ivy.lib.dir}" includes="commons-logging-*.jar"/> + <fileset dir="${ivy.lib.dir}" includes="httpclient-*.jar"/> + <fileset dir="${ivy.lib.dir}" includes="httpcore-*.jar"/> + <fileset dir="${ivy.lib.dir}" includes="log4j-*.jar"/> + <fileset dir="${ivy.lib.dir}" includes="slf4j-*.jar"/> + <fileset dir="${ivy.lib.dir}" includes="re2j-*.jar"/> + <fileset dir="${ivy.lib.dir}" includes="woodstox-core-*.jar"/> + <fileset dir="${ivy.lib.dir}" includes="stax2-api-*.jar"/> + <fileset dir="${ivy.lib.dir}" includes="commons-configuration2-*.jar"/> + <fileset dir="${ivy.lib.dir}" includes="jackson-*.jar"/> + </copy> + </target> + + <scriptdef name="propertyreset" language="javascript" description="Allows to assign @{property} new value"> <attribute name="name"/> @@ -825,12 +872,25 @@ <jar update="yes" jarfile="${output.jarfile.core}"> <zipfileset src="${basedir}/_pig-shims.jar" includes="**/Spark2*.class"/> </jar> - <jar update="yes" jarfile="${output.jarfile.backcompat-core-h2}"> - <zipfileset src="${basedir}/_pig-shims.jar" includes="**/Spark2*.class"/> - </jar> - <jar update="yes" jarfile="${output.jarfile.withouthadoop-h2}"> - <zipfileset src="${basedir}/_pig-shims.jar" includes="**/Spark2*.class"/> - </jar> + <if> + <equals arg1="${isHadoop2}" arg2="true" /> + <then> + <jar update="yes" jarfile="${output.jarfile.backcompat-core-h2}"> + <zipfileset src="${basedir}/_pig-shims.jar" includes="**/Spark2*.class"/> + </jar> + <jar update="yes" jarfile="${output.jarfile.withouthadoop-h2}"> + <zipfileset src="${basedir}/_pig-shims.jar" includes="**/Spark2*.class"/> + </jar> + </then> + <else> + <jar update="yes" jarfile="${output.jarfile.backcompat-core-h3}"> + <zipfileset src="${basedir}/_pig-shims.jar" includes="**/Spark2*.class"/> + </jar> + <jar update="yes" jarfile="${output.jarfile.withouthadoop-h3}"> + <zipfileset src="${basedir}/_pig-shims.jar" includes="**/Spark2*.class"/> + </jar> + </else> + </if> <delete file="${basedir}/_pig-shims.jar"/> </target> @@ -1009,13 +1069,6 @@ <target name="test-core-mrtez" description="run core tests on both mr and tez mode" depends="setWindowsPath,setLinuxPath,compile-test,debugger.check,jackson-pig-3039-test-download"> - <fail message="hadoopversion must be set to 2 when invoking test-core-mrtez"> - <condition> - <not> - <equals arg1="${hadoopversion}" arg2="2" /> - </not> - </condition> - </fail> <echo message="=======================" /> <echo message="Running MR tests" /> <echo message="=======================" /> @@ -1113,7 +1166,6 @@ <!-- ================================================================== --> <!-- Distribution --> <!-- ================================================================== --> - <target name="package" depends="jar, docs, api-report, piggybank" description="Create a Pig tar release"> <package-base/> </target> @@ -1134,6 +1186,7 @@ </copy> <copy file="${output.jarfile.backcompat-core-h2}" tofile="${tar.dist.dir}/${final.name}-core-h2.jar" failonerror="false"/> + <copy file="${output.jarfile.backcompat-core-h3}" tofile="${tar.dist.dir}/${final.name}-core-h3.jar" failonerror="false"/> <copy todir="${tar.dist.dir}/lib" file="contrib/piggybank/java/piggybank.jar"/> @@ -1731,6 +1784,7 @@ </target> <target name="ivy-resolve" depends="ivy-init" unless="ivy.resolved" description="Resolve Ivy dependencies"> + <loadproperties srcfile="${ivy.dir}/libraries-h${hadoopversion}.properties"/> <property name="ivy.resolved" value="true"/> <echo>*** Ivy resolve with Hadoop ${hadoopversion}, Spark ${sparkversion}, HBase ${hbaseversion}, Hive ${hiveversion} ***</echo> <ivy:resolve log="${loglevel}" settingsRef="${ant.project.name}.ivy.settings" conf="compile"/> Modified: pig/trunk/ivy.xml URL: http://svn.apache.org/viewvc/pig/trunk/ivy.xml?rev=1903377&r1=1903376&r2=1903377&view=diff ============================================================================== --- pig/trunk/ivy.xml (original) +++ pig/trunk/ivy.xml Fri Aug 12 21:42:19 2022 @@ -39,6 +39,7 @@ <conf name="checkstyle" visibility="private"/> <conf name="buildJar" extends="compile,test" visibility="private"/> <conf name="hadoop2" visibility="private"/> + <conf name="hadoop3" visibility="private"/> <conf name="hbase1" visibility="private"/> <conf name="hbase2" visibility="private"/> <conf name="spark1" visibility="private" /> @@ -62,13 +63,13 @@ <dependency org="commons-beanutils" name="commons-beanutils-core" rev="${commons-beanutils.version}" conf="checkstyle->master"/> <dependency org="com.sun.jersey" name="jersey-bundle" rev="${jersey.version}" - conf="hadoop2->master"/> + conf="hadoop2->master;hadoop3->master"/> <dependency org="com.sun.jersey" name="jersey-server" rev="${jersey.version}" - conf="hadoop2->master"/> + conf="hadoop2->master;hadoop3->master"/> <dependency org="com.sun.jersey.contribs" name="jersey-guice" rev="${jersey.version}" - conf="hadoop2->master"/> + conf="hadoop2->master;hadoop3->master"/> <dependency org="commons-codec" name="commons-codec" rev="${commons-codec.version}" - conf="hadoop2->master"/> + conf="hadoop2->master;hadoop3->master"/> <dependency org="commons-el" name="commons-el" rev="${commons-el.version}" conf="compile->master"/> <dependency org="commons-io" name="commons-io" rev="${commons-io.version}" @@ -82,84 +83,102 @@ <artifact name="httpdlog-pigloader" m:classifier="udf"/> </dependency> <dependency org="commons-configuration" name="commons-configuration" rev="${commons-configuration.version}" - conf="hadoop2->master"/> + conf="hadoop2->master;hadoop3->master"/> <dependency org="commons-collections" name="commons-collections" rev="${commons-collections.version}" - conf="hadoop2->master"/> + conf="hadoop2->master;hadoop3->master"/> <dependency org="javax.servlet" name="servlet-api" rev="${servlet-api.version}" - conf="hadoop2->master"/> + conf="hadoop2->master;hadoop3->master"/> <dependency org="javax.ws.rs" name="jsr311-api" rev="${jsr311-api.version}" - conf="hadoop2->master"/> + conf="hadoop2->master;hadoop3->master"/> <dependency org="com.google.protobuf" name="protobuf-java" rev="${protobuf-java.version}" - conf="hadoop2->master"/> + conf="hadoop2->master;hadoop3->master"/> <dependency org="javax.inject" name="javax.inject" rev="${javax-inject.version}" - conf="hadoop2->master"/> + conf="hadoop2->master;hadoop3->master"/> <dependency org="javax.xml.bind" name="jaxb-api" rev="${jaxb-api.version}" - conf="hadoop2->master"/> + conf="hadoop2->master;hadoop3->master"/> <dependency org="com.sun.xml.bind" name="jaxb-impl" rev="${jaxb-impl.version}" - conf="hadoop2->master"/> + conf="hadoop2->master;hadoop3->master"/> <dependency org="com.google.inject" name="guice" rev="${guice.version}" - conf="hadoop2->master"/> + conf="hadoop2->master;hadoop3->master"/> <dependency org="com.google.inject.extensions" name="guice-servlet" rev="${guice-servlet.version}" - conf="hadoop2->master"/> + conf="hadoop2->master;hadoop3->master"/> <dependency org="aopalliance" name="aopalliance" rev="${aopalliance.version}" - conf="hadoop2->master"/> + conf="hadoop2->master;hadoop3->master"/> <dependency org="org.glassfish" name="javax.el" rev="${glassfish.el.version}" - conf="hadoop2->master"/> + conf="hadoop2->master;hadoop3->master"/> <dependency org="log4j" name="log4j" rev="${log4j.version}" conf="compile->master"/> + <dependency org="com.google.re2j" name="re2j" rev="${re2j.version}" + conf="hadoop3->master" /> + <dependency org="com.codahale.metrics" name="metrics-core" rev="${codahale.metrics-core.version}" + conf="hadoop3->master"/> + + <!-- Hadoop 2/3 dependencies --> <dependency org="org.apache.hadoop" name="hadoop-annotations" - rev="${hadoop-common.version}" conf="hadoop2->master"/> - <dependency org="org.apache.hadoop" name="hadoop-auth" - rev="${hadoop-common.version}" conf="hadoop2->master"/> - <dependency org="org.apache.hadoop" name="hadoop-common" - rev="${hadoop-common.version}" conf="hadoop2->master"> + rev="${hadoop-common.version}" conf="hadoop2->master;hadoop3->master"/> + <dependency org="org.apache.hadoop" name="hadoop-auth" + rev="${hadoop-common.version}" conf="hadoop2->master;hadoop3->master"/> + <dependency org="org.apache.hadoop" name="hadoop-common" + rev="${hadoop-common.version}" conf="hadoop2->master;hadoop3->default"> <artifact name="hadoop-common" ext="jar" /> <artifact name="hadoop-common" type="tests" ext="jar" m:classifier="tests" /> </dependency> <dependency org="org.apache.hadoop" name="hadoop-hdfs" - rev="${hadoop-hdfs.version}" conf="hadoop2->master"> + rev="${hadoop-hdfs.version}" conf="hadoop2->master;hadoop3->default"> <artifact name="hadoop-hdfs" ext="jar" /> <artifact name="hadoop-hdfs" type="tests" ext="jar" m:classifier="tests" /> </dependency> <dependency org="org.apache.hadoop" name="hadoop-mapreduce-client-core" rev="${hadoop-mapreduce.version}" - conf="hadoop2->master"/> + conf="hadoop2->master;hadoop3->master"/> <dependency org="org.apache.hadoop" name="hadoop-mapreduce-client-jobclient" rev="${hadoop-mapreduce.version}" - conf="hadoop2->master"> - <artifact name="hadoop-mapreduce-client-jobclient" ext="jar" /> - <artifact name="hadoop-mapreduce-client-jobclient" type="tests" ext="jar" m:classifier="tests"/> - <exclude org="commons-daemon" module="commons-daemon"/><!--bad POM--> - <exclude org="org.apache.commons" module="commons-daemon"/><!--bad POM--> + conf="hadoop2->master;hadoop3->master"> + <artifact name="hadoop-mapreduce-client-jobclient" ext="jar" /> + <artifact name="hadoop-mapreduce-client-jobclient" type="tests" ext="jar" m:classifier="tests"/> + <exclude org="commons-daemon" module="commons-daemon"/><!--bad POM--> + <exclude org="org.apache.commons" module="commons-daemon"/><!--bad POM--> </dependency> <dependency org="org.apache.hadoop" name="hadoop-yarn-server-tests" rev="${hadoop-mapreduce.version}" - conf="hadoop2->master"> + conf="hadoop2->master;hadoop3->master"> <artifact name="hadoop-yarn-server-tests" type="jar" m:classifier="tests"/> </dependency> <dependency org="org.apache.hadoop" name="hadoop-mapreduce-client-app" rev="${hadoop-mapreduce.version}" - conf="hadoop2->master" /> + conf="hadoop2->master;hadoop3->master"/> <dependency org="org.apache.hadoop" name="hadoop-mapreduce-client-shuffle" rev="${hadoop-mapreduce.version}" - conf="hadoop2->master" /> - <dependency org="org.apache.hadoop" name="hadoop-mapreduce-client-common" - rev="${hadoop-mapreduce.version}" conf="hadoop2->master"/> - <dependency org="org.apache.hadoop" name="hadoop-yarn-api" - rev="${hadoop-mapreduce.version}" conf="hadoop2->master"/> - <dependency org="org.apache.hadoop" name="hadoop-yarn-common" - rev="${hadoop-mapreduce.version}" conf="hadoop2->master"/> - <dependency org="org.apache.hadoop" name="hadoop-yarn-server" - rev="${hadoop-mapreduce.version}" conf="hadoop2->master"/> - <dependency org="org.apache.hadoop" name="hadoop-yarn-server-web-proxy" - rev="${hadoop-mapreduce.version}" conf="hadoop2->master"/> - <dependency org="org.apache.hadoop" name="hadoop-yarn-server-common" - rev="${hadoop-mapreduce.version}" conf="hadoop2->master"/> - <dependency org="org.apache.hadoop" name="hadoop-yarn-server-nodemanager" - rev="${hadoop-mapreduce.version}" conf="hadoop2->master"/> + conf="hadoop2->master;hadoop3->master"/> + <dependency org="org.apache.hadoop" name="hadoop-mapreduce-client-common" + rev="${hadoop-mapreduce.version}" conf="hadoop2->master;hadoop3->master"/> + <dependency org="org.apache.hadoop" name="hadoop-yarn-api" + rev="${hadoop-mapreduce.version}" conf="hadoop2->master;hadoop3->master"/> + <dependency org="org.apache.hadoop" name="hadoop-yarn-common" + rev="${hadoop-mapreduce.version}" conf="hadoop2->master;hadoop3->master"/> + <dependency org="org.apache.hadoop" name="hadoop-yarn-server" + rev="${hadoop-mapreduce.version}" conf="hadoop2->master;hadoop3->master"/> + <dependency org="org.apache.hadoop" name="hadoop-yarn-server-web-proxy" + rev="${hadoop-mapreduce.version}" conf="hadoop2->master;hadoop3->master"/> + <dependency org="org.apache.hadoop" name="hadoop-yarn-server-common" + rev="${hadoop-mapreduce.version}" conf="hadoop2->master;hadoop3->master"/> + <dependency org="org.apache.hadoop" name="hadoop-yarn-server-nodemanager" + rev="${hadoop-mapreduce.version}" conf="hadoop2->master;hadoop3->default"> + </dependency> <dependency org="org.apache.hadoop" name="hadoop-yarn-server-resourcemanager" - rev="${hadoop-mapreduce.version}" conf="hadoop2->master"/> - <dependency org="org.apache.hadoop" name="hadoop-yarn-client" - rev="${hadoop-mapreduce.version}" conf="hadoop2->master"/> - <dependency org="org.apache.hadoop" name="hadoop-yarn-server-applicationhistoryservice" - rev="${hadoop-mapreduce.version}" conf="hadoop2->master"/> - <dependency org="org.apache.hadoop" name="hadoop-mapreduce-client-hs" - rev="${hadoop-mapreduce.version}" conf="hadoop2->master"/> + rev="${hadoop-mapreduce.version}" conf="hadoop2->master;hadoop3->master"/> + <dependency org="org.apache.hadoop" name="hadoop-yarn-client" + rev="${hadoop-mapreduce.version}" conf="hadoop2->master;hadoop3->master"/> + <dependency org="org.apache.hadoop" name="hadoop-yarn-server-applicationhistoryservice" + rev="${hadoop-mapreduce.version}" conf="hadoop2->master;hadoop3->master"/> + <dependency org="org.apache.hadoop" name="hadoop-mapreduce-client-hs" + rev="${hadoop-mapreduce.version}" conf="hadoop2->master;hadoop3->master"/> + + <!--Hadoop 3 dependencies--> + <dependency org="org.apache.hadoop" name="hadoop-hdfs-client" + rev="${hadoop-hdfs.version}" conf="hadoop3->master"> + <artifact name="hadoop-hdfs-client" ext="jar" /> + <artifact name="hadoop-hdfs-client" type="tests" ext="jar" m:classifier="tests" /> + </dependency> + + <dependency org="org.apache.hadoop" name="hadoop-yarn-server-timelineservice" + rev="${hadoop-mapreduce.version}" conf="hadoop3->master"/> + <dependency org="commons-logging" name="commons-logging" rev="${commons-logging.version}" conf="compile->master;checkstyle->master"/> <dependency org="org.slf4j" name="slf4j-log4j12" rev="${slf4j-log4j12.version}" @@ -172,7 +191,7 @@ <exclude org="org.codehaus.jackson" module="jackson-mapper-asl"/> </dependency> <dependency org="org.apache.avro" name="avro-mapred" rev="${avro.version}" - conf="hadoop2->default;checkstyle->master"> + conf="hadoop2->default;checkstyle->master;hadoop3->master"> <artifact name="avro-mapred" type="jar" m:classifier="hadoop2"/> <exclude org="org.codehaus.jackson" module="jackson-core-asl"/> <exclude org="org.codehaus.jackson" module="jackson-mapper-asl"/> @@ -477,13 +496,11 @@ <dependency org="com.lmax" name="disruptor" rev="3.3.6" conf="hbase2->master"/> <!-- End of HBase dependencies --> - <dependency org="org.htrace" name="htrace-core" rev="3.0.4" conf="hadoop2->master"/> - <dependency org="org.apache.htrace" name="htrace-core" rev="${htrace.version}" conf="hadoop2->master"/> + <dependency org="org.htrace" name="htrace-core" rev="3.0.4" conf="hadoop2->master;hadoop3->master"/> + <dependency org="org.apache.htrace" name="htrace-core" rev="${htrace.version}" + conf="hadoop2->master;hadoop3->master;hbase1->master"/> <dependency org="org.fusesource.leveldbjni" name="leveldbjni-all" rev="${leveldbjni.version}" - conf="hadoop2->master"/> - <dependency org="org.cloudera.htrace" name="htrace-core" rev="2.00" conf="hbase1->master"> - <artifact name="htrace-core" type="jar"/> - </dependency> + conf="hadoop2->master;hadoop3->master"/> <!-- for TestHBaseStorage --> <dependency org="org.apache.hbase" name="hbase-procedure" rev="${hbase1.version}" conf="test->master"/> @@ -491,7 +508,7 @@ conf="test->default"/> <!-- Dependency for Accumulo{Input,Output}Format --> - <dependency org="org.apache.accumulo" name="accumulo-core" rev="${accumulo15.version}" conf="compile->default"> + <dependency org="org.apache.accumulo" name="accumulo-core" rev="${accumulo.version}" conf="compile->default"> <exclude org="com.google.guava" module="guava"/> <exclude org="commons-codec" module="commons-codec"/> <exclude org="commons-collections" module="commons-collections"/> @@ -508,7 +525,7 @@ </dependency> <!-- Used for 'functional' Accumulo tests --> - <dependency org="org.apache.accumulo" name="accumulo-minicluster" rev="${accumulo15.version}" conf="compile->default"> + <dependency org="org.apache.accumulo" name="accumulo-minicluster" rev="${accumulo.version}" conf="hadoop2->default"> <exclude org="com.google.guava" module="guava"/> <exclude org="commons-codec" module="commons-codec"/> <exclude org="commons-collections" module="commons-collections"/> @@ -524,6 +541,12 @@ <exclude org="org.slf4j" module="slf4j-log4j12"/> </dependency> + <dependency org="org.apache.accumulo" name="accumulo-minicluster" rev="${accumulo.version}" conf="hadoop3->master"/> + <dependency org="org.apache.accumulo" name="accumulo-server-base" rev="${accumulo.version}" conf="hadoop3->master"/> + <dependency org="org.apache.accumulo" name="accumulo-tserver" rev="${accumulo.version}" conf="hadoop3->master"/> + <dependency org="org.apache.accumulo" name="accumulo-master" rev="${accumulo.version}" conf="hadoop3->master"/> + <dependency org="org.apache.accumulo" name="accumulo-gc" rev="${accumulo.version}" conf="hadoop3->master"/> + <!-- for piggybank --> <dependency org="org.hsqldb" name="hsqldb" rev="${hsqldb.version}" conf="test->default" /> @@ -560,7 +583,6 @@ <dependency org="org.apache.hive.shims" name="hive-shims-0.23" rev="${hive.version}" changing="true" conf="hive3->master" /> - <dependency org="org.apache.orc" name="orc-core" rev="${orc.version}" changing="true" conf="hive3->default" /> <dependency org="org.apache.hive" name="hive-storage-api" rev="${hive-storage-api.version}" changing="true" conf="hive3->master" /> <dependency org="org.iq80.snappy" name="snappy" rev="${snappy.version}" @@ -617,31 +639,33 @@ <!-- for Tez integration --> <dependency org="org.apache.tez" name="tez" rev="${tez.version}" - conf="hadoop2->master"/> + conf="hadoop2->master;hadoop3->master"/> <dependency org="org.apache.tez" name="tez-common" rev="${tez.version}" - conf="hadoop2->master"/> + conf="hadoop2->master;hadoop3->master"/> <dependency org="org.apache.tez" name="tez-api" rev="${tez.version}" - conf="hadoop2->master"/> + conf="hadoop2->master;hadoop3->master"/> <dependency org="org.apache.tez" name="tez-dag" rev="${tez.version}" - conf="hadoop2->master"/> + conf="hadoop2->master;hadoop3->master"/> <dependency org="org.apache.tez" name="tez-runtime-internals" rev="${tez.version}" - conf="hadoop2->master"/> + conf="hadoop2->master;hadoop3->master"/> <dependency org="org.apache.tez" name="tez-runtime-library" rev="${tez.version}" - conf="hadoop2->master"/> + conf="hadoop2->master;hadoop3->master"/> <dependency org="org.apache.tez" name="tez-mapreduce" rev="${tez.version}" - conf="hadoop2->master"/> + conf="hadoop2->master;hadoop3->master"/> <dependency org="org.apache.tez" name="tez-yarn-timeline-history-with-acls" rev="${tez.version}" - conf="hadoop2->master"/> + conf="hadoop2->master;hadoop3->master"/> <dependency org="org.apache.commons" name="commons-collections4" rev="${commons-collections4.version}" - conf="hadoop2->master"/> + conf="hadoop2->master;hadoop3->master"/> <dependency org="org.codehaus.jettison" name="jettison" rev="${jettison.version}" - conf="hadoop2->master"/> + conf="hadoop2->master;hadoop3->master"/> <dependency org="org.apache.commons" name="commons-math3" rev="${commons-math3.version}" - conf="hadoop2->master"/> + conf="hadoop2->master;hadoop3->master"/> <dependency org="org.apache.curator" name="curator-framework" rev="${curator.version}" - conf="hadoop2->master"/> + conf="hadoop2->master;hadoop3->master"/> <dependency org="org.apache.curator" name="curator-client" rev="${curator.version}" - conf="hadoop2->master"/> + conf="hadoop2->master;hadoop3->master"/> + <dependency org="org.apache.curator" name="curator-recipes" rev="${curator.version}" + conf="hadoop2->master;hadoop3->master"/> <!-- For dependency check --> <dependency org="org.owasp" name="dependency-check-ant" rev="${dependency-check-ant.version}" conf="owasp->default"/> Added: pig/trunk/ivy/libraries-h2.properties URL: http://svn.apache.org/viewvc/pig/trunk/ivy/libraries-h2.properties?rev=1903377&view=auto ============================================================================== --- pig/trunk/ivy/libraries-h2.properties (added) +++ pig/trunk/ivy/libraries-h2.properties Fri Aug 12 21:42:19 2022 @@ -0,0 +1,23 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#This properties file lists the versions of the various artifacts used by pig and components. +#It drives ivy and the generation of a maven POM + +#These are the versions of our Hadoop 2 dependencies (core and other libs separated and in alphabetical order) +hadoop-common.version=2.7.3 +hadoop-hdfs.version=2.7.3 +hadoop-mapreduce.version=2.7.3 + +accumulo.version=1.5.0 +netty.version=3.6.6.Final +netty-all.version=4.0.23.Final \ No newline at end of file Added: pig/trunk/ivy/libraries-h3.properties URL: http://svn.apache.org/viewvc/pig/trunk/ivy/libraries-h3.properties?rev=1903377&view=auto ============================================================================== --- pig/trunk/ivy/libraries-h3.properties (added) +++ pig/trunk/ivy/libraries-h3.properties Fri Aug 12 21:42:19 2022 @@ -0,0 +1,25 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#This properties file lists the versions of the various artifacts used by pig and components. +#It drives ivy and the generation of a maven POM + +#These are the versions of our Hadoop 3 dependencies (core and other libs separated and in alphabetical order) +hadoop-common.version=3.1.1 +hadoop-hdfs.version=3.1.1 +hadoop-mapreduce.version=3.1.1 + +accumulo.version=1.6.0 +netty.version=3.10.5.Final +codahale.metrics-core.version=3.0.1 +netty-all.version=4.1.0.Beta5 +re2j.version=1.0 \ No newline at end of file Modified: pig/trunk/ivy/libraries.properties URL: http://svn.apache.org/viewvc/pig/trunk/ivy/libraries.properties?rev=1903377&r1=1903376&r2=1903377&view=diff ============================================================================== --- pig/trunk/ivy/libraries.properties (original) +++ pig/trunk/ivy/libraries.properties Fri Aug 12 21:42:19 2022 @@ -13,8 +13,7 @@ #This properties file lists the versions of the various artifacts used by pig and components. #It drives ivy and the generation of a maven POM -#These are the versions of our dependencies (in alphabetical order) -accumulo15.version=1.5.0 +#These are the versions of our common dependencies (in alphabetical order) apacheant.version=1.7.1 apacherat.version=0.8 automaton.version=1.11-8 @@ -35,9 +34,6 @@ checkstyle.version=4.2 ivy.version=2.2.0 groovy.version=2.4.5 guava.version=11.0 -hadoop-common.version=2.7.3 -hadoop-hdfs.version=2.7.3 -hadoop-mapreduce.version=2.7.3 hbase1.version=1.2.4 hbase2.version=2.0.0 hsqldb.version=2.4.0 @@ -95,7 +91,7 @@ parquet-pig-bundle.version=1.9.0 snappy.version=0.2 leveldbjni.version=1.8 curator.version=2.6.0 -htrace.version=3.1.0-incubating +htrace.version=3.2.0-incubating htrace4.version=4.0.1-incubating commons-lang3.version=3.6 scala-xml.version=1.0.5 Modified: pig/trunk/test/org/apache/pig/parser/TestErrorHandling.java URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/parser/TestErrorHandling.java?rev=1903377&r1=1903376&r2=1903377&view=diff ============================================================================== --- pig/trunk/test/org/apache/pig/parser/TestErrorHandling.java (original) +++ pig/trunk/test/org/apache/pig/parser/TestErrorHandling.java Fri Aug 12 21:42:19 2022 @@ -134,9 +134,10 @@ public class TestErrorHandling { try { pig.registerQuery( query ); } catch(FrontendException ex) { - System.out.println( ex.getCause().getMessage() ); - Assert.assertTrue( ex.getCause().getMessage().contains( "line 2, column 0" ) ); - Assert.assertTrue( ex.getCause().getMessage().contains( "No FileSystem for scheme: fs2you" ) ); + String message = ex.getCause().getMessage(); + Assert.assertTrue( message.contains( "line 2, column 0" ) ); + Assert.assertTrue( message.contains( "No FileSystem for scheme: fs2you" ) + || message.contains( "No FileSystem for scheme \"fs2you\"" )); return; } Assert.fail( "Testcase should fail" ); Modified: pig/trunk/test/org/apache/pig/parser/TestQueryParserUtils.java URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/parser/TestQueryParserUtils.java?rev=1903377&r1=1903376&r2=1903377&view=diff ============================================================================== --- pig/trunk/test/org/apache/pig/parser/TestQueryParserUtils.java (original) +++ pig/trunk/test/org/apache/pig/parser/TestQueryParserUtils.java Fri Aug 12 21:42:19 2022 @@ -60,13 +60,13 @@ public class TestQueryParserUtils { assertEquals(null, props.getProperty(MRConfiguration.JOB_HDFS_SERVERS)); // Same host different scheme - QueryParserUtils.setHdfsServers("hftp://nn1/tmp", pc); - assertEquals("hftp://nn1", props.getProperty(MRConfiguration.JOB_HDFS_SERVERS)); - QueryParserUtils.setHdfsServers("hftp://nn1:50070/tmp", pc); - assertEquals("hftp://nn1,hftp://nn1:50070", props.getProperty(MRConfiguration.JOB_HDFS_SERVERS)); + QueryParserUtils.setHdfsServers("swebhdfs://nn1/tmp", pc); + assertEquals("swebhdfs://nn1", props.getProperty(MRConfiguration.JOB_HDFS_SERVERS)); + QueryParserUtils.setHdfsServers("swebhdfs://nn1:50070/tmp", pc); + assertEquals("swebhdfs://nn1,swebhdfs://nn1:50070", props.getProperty(MRConfiguration.JOB_HDFS_SERVERS)); // There should be no duplicates - QueryParserUtils.setHdfsServers("hftp://nn1:50070/tmp", pc); - assertEquals("hftp://nn1,hftp://nn1:50070", props.getProperty(MRConfiguration.JOB_HDFS_SERVERS)); + QueryParserUtils.setHdfsServers("swebhdfs://nn1:50070/tmp", pc); + assertEquals("swebhdfs://nn1,swebhdfs://nn1:50070", props.getProperty(MRConfiguration.JOB_HDFS_SERVERS)); // har props.remove(MRConfiguration.JOB_HDFS_SERVERS); Added: pig/trunk/test/org/apache/pig/test/MapReduceMiniCluster.java URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/MapReduceMiniCluster.java?rev=1903377&view=auto ============================================================================== --- pig/trunk/test/org/apache/pig/test/MapReduceMiniCluster.java (added) +++ pig/trunk/test/org/apache/pig/test/MapReduceMiniCluster.java Fri Aug 12 21:42:19 2022 @@ -0,0 +1,48 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.pig.test; + +import org.apache.pig.ExecType; +import org.apache.pig.backend.hadoop.executionengine.Launcher; +import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MRConfiguration; +import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher; + +public class MapReduceMiniCluster extends YarnMiniCluster { + + public MapReduceMiniCluster(int dataNodeCount, int nodeManagerCount) { + super(dataNodeCount, nodeManagerCount); + } + + @Override + public ExecType getExecType() { + return ExecType.MAPREDUCE; + } + + static public Launcher getLauncher() { + return new MapReduceLauncher(); + } + + @Override + protected void setConfigOverrides() { + m_mr_conf.setInt(MRConfiguration.SUMIT_REPLICATION, 2); + m_mr_conf.setInt(MRConfiguration.MAP_MAX_ATTEMPTS, 2); + m_mr_conf.setInt(MRConfiguration.REDUCE_MAX_ATTEMPTS, 2); + m_mr_conf.setInt("pig.jobcontrol.sleep", 100); + } +} Modified: pig/trunk/test/org/apache/pig/test/MiniCluster.java URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/MiniCluster.java?rev=1903377&r1=1903376&r2=1903377&view=diff ============================================================================== --- pig/trunk/test/org/apache/pig/test/MiniCluster.java (original) +++ pig/trunk/test/org/apache/pig/test/MiniCluster.java Fri Aug 12 21:42:19 2022 @@ -1,134 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.pig.test; - -import java.io.File; -import java.io.FileOutputStream; -import java.io.IOException; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hdfs.MiniDFSCluster; -import org.apache.hadoop.mapreduce.MRJobConfig; -import org.apache.hadoop.mapreduce.filecache.DistributedCache; -import org.apache.hadoop.mapreduce.v2.MiniMRYarnCluster; -import org.apache.pig.ExecType; -import org.apache.pig.backend.hadoop.executionengine.Launcher; -import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MRConfiguration; -import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher; - -/** - * This class builds a single instance of itself with the Singleton - * design pattern. While building the single instance, it sets up a - * mini cluster that actually consists of a mini DFS cluster and a - * mini MapReduce cluster on the local machine and also sets up the - * environment for Pig to run on top of the mini cluster. - */ -public class MiniCluster extends MiniGenericCluster { - private static final File CONF_DIR = new File("build/classes"); - private static final File CONF_FILE = new File(CONF_DIR, "hadoop-site.xml"); - - protected MiniMRYarnCluster m_mr = null; - private Configuration m_dfs_conf = null; - private Configuration m_mr_conf = null; - - @Override - public ExecType getExecType() { - return ExecType.MAPREDUCE; - } - - @Override - protected void setupMiniDfsAndMrClusters() { - try { - final int dataNodes = 4; // There will be 4 data nodes - final int taskTrackers = 4; // There will be 4 task tracker nodes - - System.setProperty("hadoop.log.dir", "build/test/logs"); - // Create the dir that holds hadoop-site.xml file - // Delete if hadoop-site.xml exists already - CONF_DIR.mkdirs(); - if(CONF_FILE.exists()) { - CONF_FILE.delete(); - } - - // Builds and starts the mini dfs and mapreduce clusters - Configuration config = new Configuration(); - config.set("yarn.scheduler.capacity.root.queues", "default"); - config.set("yarn.scheduler.capacity.root.default.capacity", "100"); - config.set("yarn.scheduler.capacity.maximum-am-resource-percent", "0.1"); - m_dfs = new MiniDFSCluster(config, dataNodes, true, null); - m_fileSys = m_dfs.getFileSystem(); - m_dfs_conf = m_dfs.getConfiguration(0); - - //Create user home directory - m_fileSys.mkdirs(m_fileSys.getWorkingDirectory()); - - m_mr = new MiniMRYarnCluster("PigMiniCluster", taskTrackers); - m_mr.init(m_dfs_conf); - m_mr.start(); - - // Write the necessary config info to hadoop-site.xml - m_mr_conf = new Configuration(m_mr.getConfig()); - - m_conf = m_mr_conf; - m_conf.set(FileSystem.FS_DEFAULT_NAME_KEY, m_dfs_conf.get(FileSystem.FS_DEFAULT_NAME_KEY)); - m_conf.unset(MRConfiguration.JOB_CACHE_FILES); - - m_conf.setInt(MRConfiguration.IO_SORT_MB, 50); - m_conf.set(MRConfiguration.CHILD_JAVA_OPTS, "-Xmx384m"); - m_conf.setInt(MRJobConfig.MAP_MEMORY_MB, 512); - m_conf.setInt(MRJobConfig.REDUCE_MEMORY_MB, 512); - m_conf.set(MRJobConfig.MR_AM_COMMAND_OPTS, "-Xmx384m"); - m_conf.setInt(MRJobConfig.MR_AM_VMEM_MB, 512); - - m_conf.setInt(MRConfiguration.SUMIT_REPLICATION, 2); - m_conf.setInt(MRConfiguration.MAP_MAX_ATTEMPTS, 2); - m_conf.setInt(MRConfiguration.REDUCE_MAX_ATTEMPTS, 2); - m_conf.set("dfs.datanode.address", "0.0.0.0:0"); - m_conf.set("dfs.datanode.http.address", "0.0.0.0:0"); - m_conf.set("pig.jobcontrol.sleep", "100"); - m_conf.writeXml(new FileOutputStream(CONF_FILE)); - m_fileSys.copyFromLocalFile(new Path(CONF_FILE.getAbsoluteFile().toString()), - new Path("/pigtest/conf/hadoop-site.xml")); - DistributedCache.addFileToClassPath(new Path("/pigtest/conf/hadoop-site.xml"), m_conf); - - System.err.println("XXX: Setting " + FileSystem.FS_DEFAULT_NAME_KEY + " to: " + m_conf.get(FileSystem.FS_DEFAULT_NAME_KEY)); - // Set the system properties needed by Pig - System.setProperty("cluster", m_conf.get(MRConfiguration.JOB_TRACKER)); - System.setProperty("namenode", m_conf.get(FileSystem.FS_DEFAULT_NAME_KEY)); - System.setProperty("junit.hadoop.conf", CONF_DIR.getPath()); - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - @Override - protected void shutdownMiniMrClusters() { - // Delete hadoop-site.xml on shutDown - if(CONF_FILE.exists()) { - CONF_FILE.delete(); - } - if (m_mr != null) { m_mr.stop(); } - m_mr = null; - } - - static public Launcher getLauncher() { - return new MapReduceLauncher(); - } -} Modified: pig/trunk/test/org/apache/pig/test/MiniGenericCluster.java URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/MiniGenericCluster.java?rev=1903377&r1=1903376&r2=1903377&view=diff ============================================================================== --- pig/trunk/test/org/apache/pig/test/MiniGenericCluster.java (original) +++ pig/trunk/test/org/apache/pig/test/MiniGenericCluster.java Fri Aug 12 21:42:19 2022 @@ -50,34 +50,41 @@ abstract public class MiniGenericCluster public static String EXECTYPE_TEZ = "tez"; public static String EXECTYPE_SPARK = "spark"; + private static final int DEFAULT_DATANODE_COUNT = 2; + private static final int DEFAULT_NODEMANAGER_COUNT = 2; + /** * Returns the single instance of class MiniGenericCluster that represents * the resources for a mini dfs cluster and a mini mr (or tez) cluster. The * system property "test.exec.type" is used to decide whether a mr or tez mini * cluster will be returned. */ - public static MiniGenericCluster buildCluster() { + public static MiniGenericCluster buildCluster(int dataNodeCount, int nodeManagerCount) { if (INSTANCE == null) { String execType = System.getProperty("test.exec.type"); if (execType == null) { // Default to MR System.setProperty("test.exec.type", EXECTYPE_MR); - return buildCluster(EXECTYPE_MR); + return buildCluster(EXECTYPE_MR, dataNodeCount, nodeManagerCount); } - return buildCluster(execType); + return buildCluster(execType, dataNodeCount, nodeManagerCount); } return INSTANCE; } - public static MiniGenericCluster buildCluster(String execType) { + public static MiniGenericCluster buildCluster() { + return buildCluster(DEFAULT_DATANODE_COUNT, DEFAULT_NODEMANAGER_COUNT); + } + + public static MiniGenericCluster buildCluster(String execType, int dataNodeCount, int nodeManagerCount) { if (INSTANCE == null) { if (execType.equalsIgnoreCase(EXECTYPE_MR)) { - INSTANCE = new MiniCluster(); + INSTANCE = new MapReduceMiniCluster(dataNodeCount, nodeManagerCount); } else if (execType.equalsIgnoreCase(EXECTYPE_TEZ)) { - INSTANCE = new TezMiniCluster(); + INSTANCE = new TezMiniCluster(dataNodeCount, nodeManagerCount); } else if (execType.equalsIgnoreCase(EXECTYPE_SPARK)) { - INSTANCE = new SparkMiniCluster(); + INSTANCE = new SparkMiniCluster(dataNodeCount, nodeManagerCount); } else { throw new RuntimeException("Unknown test.exec.type: " + execType); } @@ -89,6 +96,10 @@ abstract public class MiniGenericCluster return INSTANCE; } + public static MiniGenericCluster buildCluster(String execType) { + return buildCluster(execType, DEFAULT_DATANODE_COUNT, DEFAULT_NODEMANAGER_COUNT); + } + abstract public ExecType getExecType(); abstract protected void setupMiniDfsAndMrClusters(); @@ -157,7 +168,7 @@ abstract public class MiniGenericCluster System.setProperty("test.exec.type", EXECTYPE_MR); } if (execType.equalsIgnoreCase(EXECTYPE_MR)) { - return MiniCluster.getLauncher(); + return MapReduceMiniCluster.getLauncher(); } else if (execType.equalsIgnoreCase(EXECTYPE_TEZ)) { return TezMiniCluster.getLauncher(); } else if(execType.equalsIgnoreCase(EXECTYPE_SPARK)){ Modified: pig/trunk/test/org/apache/pig/test/SparkMiniCluster.java URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/SparkMiniCluster.java?rev=1903377&r1=1903376&r2=1903377&view=diff ============================================================================== --- pig/trunk/test/org/apache/pig/test/SparkMiniCluster.java (original) +++ pig/trunk/test/org/apache/pig/test/SparkMiniCluster.java Fri Aug 12 21:42:19 2022 @@ -26,10 +26,13 @@ import org.apache.pig.backend.hadoop.exe public class SparkMiniCluster extends YarnMiniCluster { - private static final Log LOG = LogFactory - .getLog(SparkMiniCluster.class); + private static final Log LOG = LogFactory.getLog(SparkMiniCluster.class); private ExecType spark = new SparkExecType(); + public SparkMiniCluster(int dataNodeCount, int nodeManagerCount) { + super(dataNodeCount, nodeManagerCount); + } + @Override public ExecType getExecType() { return spark; Modified: pig/trunk/test/org/apache/pig/test/TestGrunt.java URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/TestGrunt.java?rev=1903377&r1=1903376&r2=1903377&view=diff ============================================================================== --- pig/trunk/test/org/apache/pig/test/TestGrunt.java (original) +++ pig/trunk/test/org/apache/pig/test/TestGrunt.java Fri Aug 12 21:42:19 2022 @@ -72,7 +72,7 @@ import org.junit.BeforeClass; import org.junit.Test; public class TestGrunt { - static MiniGenericCluster cluster = MiniGenericCluster.buildCluster(); + static MiniGenericCluster cluster = MiniGenericCluster.buildCluster(2, 4); private String basedir = "test/org/apache/pig/test/data"; @BeforeClass Modified: pig/trunk/test/org/apache/pig/test/TezMiniCluster.java URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/TezMiniCluster.java?rev=1903377&r1=1903376&r2=1903377&view=diff ============================================================================== --- pig/trunk/test/org/apache/pig/test/TezMiniCluster.java (original) +++ pig/trunk/test/org/apache/pig/test/TezMiniCluster.java Fri Aug 12 21:42:19 2022 @@ -41,6 +41,10 @@ public class TezMiniCluster extends Yarn private static final ExecType TEZ = new TezExecType(); + public TezMiniCluster(int dataNodeCount, int nodeManagerCount) { + super(dataNodeCount, nodeManagerCount); + } + @Override public ExecType getExecType() { return TEZ; Modified: pig/trunk/test/org/apache/pig/test/Util.java URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/Util.java?rev=1903377&r1=1903376&r2=1903377&view=diff ============================================================================== --- pig/trunk/test/org/apache/pig/test/Util.java (original) +++ pig/trunk/test/org/apache/pig/test/Util.java Fri Aug 12 21:42:19 2022 @@ -1480,7 +1480,7 @@ public class Util { } public static String findPigJarName() { - final String suffix = System.getProperty("hadoopversion").equals("20") ? "1" : "2"; + final String suffix = System.getProperty("hadoopversion", "2"); File baseDir = new File("."); String[] jarNames = baseDir.list(new FilenameFilter() { @Override Modified: pig/trunk/test/org/apache/pig/test/YarnMiniCluster.java URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/YarnMiniCluster.java?rev=1903377&r1=1903376&r2=1903377&view=diff ============================================================================== --- pig/trunk/test/org/apache/pig/test/YarnMiniCluster.java (original) +++ pig/trunk/test/org/apache/pig/test/YarnMiniCluster.java Fri Aug 12 21:42:19 2022 @@ -37,11 +37,17 @@ public abstract class YarnMiniCluster ex protected static final File MAPRED_CONF_FILE = new File(CONF_DIR, "mapred-site.xml"); protected static final File YARN_CONF_FILE = new File(CONF_DIR, "yarn-site.xml"); - protected Configuration m_dfs_conf = null; protected MiniMRYarnCluster m_mr = null; protected Configuration m_mr_conf = null; + protected final int dataNodeCount; + protected final int nodeManagerCount; + + public YarnMiniCluster(int dataNodeCount, int nodeManagerCount) { + this.dataNodeCount = dataNodeCount; + this.nodeManagerCount = nodeManagerCount; + } @Override protected void setupMiniDfsAndMrClusters() { @@ -52,7 +58,7 @@ public abstract class YarnMiniCluster ex // Build mini DFS cluster Configuration hdfsConf = new Configuration(); m_dfs = new MiniDFSCluster.Builder(hdfsConf) - .numDataNodes(2) + .numDataNodes(dataNodeCount) .format(true) .racks(null) .build(); @@ -68,7 +74,7 @@ public abstract class YarnMiniCluster ex Configuration hdfs_site = new Configuration(false); for (Map.Entry<String, String> conf : m_dfs_conf) { - if (ArrayUtils.contains(m_dfs_conf.getPropertySources(conf.getKey()), "programatically")) { + if (isProgrammaticallySet(m_dfs_conf, conf)) { hdfs_site.set(conf.getKey(), m_dfs_conf.getRaw(conf.getKey())); } } @@ -78,7 +84,7 @@ public abstract class YarnMiniCluster ex m_dfs_conf.set("yarn.scheduler.capacity.root.default.capacity", "100"); m_dfs_conf.set("yarn.scheduler.capacity.maximum-am-resource-percent", "0.1"); // Build mini YARN cluster - m_mr = new MiniMRYarnCluster("PigMiniCluster", 2); + m_mr = new MiniMRYarnCluster("PigMiniCluster", nodeManagerCount); m_mr.init(m_dfs_conf); m_mr.start(); m_mr_conf = m_mr.getConfig(); @@ -104,8 +110,11 @@ public abstract class YarnMiniCluster ex Configuration mapred_site = new Configuration(false); Configuration yarn_site = new Configuration(false); + + setConfigOverrides(); + for (Map.Entry<String, String> conf : m_mr_conf) { - if (ArrayUtils.contains(m_mr_conf.getPropertySources(conf.getKey()), "programatically")) { + if (isProgrammaticallySet(m_mr_conf, conf)) { if (conf.getKey().contains("yarn")) { yarn_site.set(conf.getKey(), m_mr_conf.getRaw(conf.getKey())); } else if (!conf.getKey().startsWith("dfs")) { @@ -126,6 +135,15 @@ public abstract class YarnMiniCluster ex } } + protected void setConfigOverrides() { + } + + private boolean isProgrammaticallySet(Configuration configuration, Map.Entry<String, String> conf) { + // In Hadoop 3 a typo was fixed: programatically -> programmatically + return ArrayUtils.contains(configuration.getPropertySources(conf.getKey()), "programmatically") + || ArrayUtils.contains(configuration.getPropertySources(conf.getKey()), "programatically"); + } + protected void deleteConfFiles() { if(CORE_CONF_FILE.exists()) { CORE_CONF_FILE.delete();