You can download and compile spark against your existing hadoop version. Here's a quick start https://spark.apache.org/docs/latest/cluster-overview.html#cluster-manager-types
You can also read a bit here http://docs.sigmoidanalytics.com/index.php/Installing_Spark_andSetting_Up_Your_Cluster ( the version is quiet old) Attached is a piece of Code (Spark Java API) to connect to HBase. Thanks Best Regards On Thu, Aug 7, 2014 at 1:48 PM, Deepa Jayaveer <deepa.jayav...@tcs.com> wrote: > Hi > I read your white paper about " " . We wanted to do a Proof of Concept on > Spark with HBase. Documents > are not much available to set up the spark cluster in Hadoop 2 > environment. If you have any, > can you please give us some reference URLs > Also, some sample program to connect to HBase using Spark Java API > > Thanks > Deepa > > =====-----=====-----===== > Notice: The information contained in this e-mail > message and/or attachments to it may contain > confidential or privileged information. If you are > not the intended recipient, any dissemination, use, > review, distribution, printing or copying of the > information contained in this e-mail message > and/or attachments to it are strictly prohibited. If > you have received this communication in error, > please notify us by reply e-mail or telephone and > immediately and permanently delete the message > and any attachments. Thank you > >
import java.util.Iterator; import java.util.List; import org.apache.commons.configuration.Configuration; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.client.Get; import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.util.Bytes; import org.apache.spark.SparkConf; import org.apache.spark.SparkContext; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.Function; import org.apache.spark.rdd.NewHadoopRDD; import org.apache.spark.streaming.Duration; import org.apache.spark.streaming.api.java.JavaStreamingContext; import org.apache.hadoop.hbase.io.ImmutableBytesWritable; import org.apache.hadoop.hbase.mapreduce.TableInputFormat; import com.google.common.collect.Lists; import scala.Function1; import scala.Tuple2; import scala.collection.JavaConversions; import scala.collection.Seq; import scala.collection.JavaConverters.*; import scala.reflect.ClassTag; public class SparkHBaseMain { @SuppressWarnings("deprecation") public static void main(String[] arg){ try{ List<String> jars = Lists.newArrayList("/home/akhld/Desktop/tools/spark-9/jars/spark-assembly-0.9.0-incubating-hadoop2.3.0-mr1-cdh5.0.0.jar", "/home/akhld/Downloads/sparkhbasecode/hbase-server-0.96.0-hadoop2.jar", "/home/akhld/Downloads/sparkhbasecode/hbase-protocol-0.96.0-hadoop2.jar", "/home/akhld/Downloads/sparkhbasecode/hbase-hadoop2-compat-0.96.0-hadoop2.jar", "/home/akhld/Downloads/sparkhbasecode/hbase-common-0.96.0-hadoop2.jar", "/home/akhld/Downloads/sparkhbasecode/hbase-client-0.96.0-hadoop2.jar", "/home/akhld/Downloads/sparkhbasecode/htrace-core-2.02.jar"); SparkConf spconf = new SparkConf(); spconf.setMaster("local"); spconf.setAppName("SparkHBase"); spconf.setSparkHome("/home/akhld/Desktop/tools/spark-9"); spconf.setJars(jars.toArray(new String[jars.size()])); spconf.set("spark.executor.memory", "1g"); final JavaSparkContext sc = new JavaSparkContext(spconf); org.apache.hadoop.conf.Configuration conf = HBaseConfiguration.create(); conf.addResource("/home/akhld/Downloads/sparkhbasecode/hbase-site.xml"); conf.set(TableInputFormat.INPUT_TABLE, "blogposts"); NewHadoopRDD<ImmutableBytesWritable, Result> rdd = new NewHadoopRDD<ImmutableBytesWritable, Result>(JavaSparkContext.toSparkContext(sc), TableInputFormat.class, ImmutableBytesWritable.class, Result.class, conf); JavaRDD<Tuple2<ImmutableBytesWritable, Result>> jrdd = rdd.toJavaRDD(); ForEachFunction f = new ForEachFunction(); JavaRDD<Iterator<String>> retrdd = jrdd.map(f); System.out.println("Count =>" + retrdd.count()); }catch(Exception e){ e.printStackTrace(); System.out.println("Craaaashed : " + e); } } @SuppressWarnings("serial") private static class ForEachFunction extends Function<Tuple2<ImmutableBytesWritable, Result>, Iterator<String>>{ public Iterator<String> call(Tuple2<ImmutableBytesWritable, Result> test) { Result tmp = (Result) test._2; List<KeyValue> kvl = tmp.getColumn("post".getBytes(), "title".getBytes()); for(KeyValue kl:kvl){ String sb = new String(kl.getValue()); System.out.println("Value :" + sb); } return null; } } }
--------------------------------------------------------------------- To unsubscribe, e-mail: user-unsubscr...@spark.apache.org For additional commands, e-mail: user-h...@spark.apache.org