SPARK SQL HiveContext Error

Gourav Sengupta Tue, 01 Mar 2016 09:20:13 -0800

Hi,

I am getting the error  "*java.lang.SecurityException: sealing violation:
can't seal package org.apache.derby.impl.services.locks: already loaded"*
after running the following code in SCALA.


I do not have any other instances of sparkContext running from my system.

I will be grateful for if anyone could kindly help me out.


Environment:
SCALA: 1.6
OS: MAC OS X

------------

import org.apache.spark.SparkContext
import org.apache.spark.SparkConf
import org.apache.spark.sql.Row
import org.apache.spark.sql.hive.HiveContext
import org.apache.spark.sql.types._
import org.apache.spark.sql.SQLContext

// Import SuccinctRDD
import edu.berkeley.cs.succinct._

object test1 {
  def main(args: Array[String]) {
    //the below line returns nothing
    println(SparkContext.jarOfClass(this.getClass).toString())
    val logFile = "/tmp/README.md" // Should be some file on your system

    val conf = new SparkConf().setAppName("IdeaProjects").setMaster("local[*]")
    val sc = new SparkContext(conf)
    val logData = sc.textFile(logFile, 2).cache()
    val numAs = logData.filter(line => line.contains("a")).count()
    val numBs = logData.filter(line => line.contains("b")).count()
    println("Lines with a: %s, Lines with b: %s".format(numAs, numBs))


    // Create a Spark RDD as a collection of articles; ctx is the SparkContext
    val articlesRDD = sc.textFile("/tmp/README.md").map(_.getBytes)

    // Compress the Spark RDD into a Succinct Spark RDD, and persist
it in memory
    // Note that this is a time consuming step (usually at
8GB/hour/core) since data needs to be compressed.
    // We are actively working on making this step faster.
    val succinctRDD = articlesRDD.succinct.persist()


    // SuccinctRDD supports a set of powerful primitives directly on
compressed RDD
    // Let us start by counting the number of occurrences of
"Berkeley" across all Wikipedia articles
    val count = succinctRDD.count("the")

    // Now suppose we want to find all offsets in the collection at
which ìBerkeleyî occurs; and
    // create an RDD containing all resulting offsets
    val offsetsRDD = succinctRDD.search("and")

    // Let us look at the first ten results in the above RDD
    val offsets = offsetsRDD.take(10)

    // Finally, let us extract 20 bytes before and after one of the
occurrences of ìBerkeleyî
    val offset = offsets(0)
    val data = succinctRDD.extract(offset - 20, 40)

    println(data)
    println(">>>")


    // Create a schema
    val citySchema = StructType(Seq(
      StructField("Name", StringType, false),
      StructField("Length", IntegerType, true),
      StructField("Area", DoubleType, false),
      StructField("Airport", BooleanType, true)))

    // Create an RDD of Rows with some data
    val cityRDD = sc.parallelize(Seq(
      Row("San Francisco", 12, 44.52, true),
      Row("Palo Alto", 12, 22.33, false),
      Row("Munich", 8, 3.14, true)))


    val hiveContext = new HiveContext(sc)

    //val sqlContext = new org.apache.spark.sql.SQLContext(sc)

  }
}


-------------



Regards,
Gourav Sengupta

SPARK SQL HiveContext Error

Reply via email to