spark version 1.4
import com.datastax.spark.connector._
import org.apache.spark._
import org.apache.spark.sql.cassandra.CassandraSQLContext
import org.apache.spark.SparkConf
//import com.microsoft.sqlserver.jdbc.SQLServerDriver
import java.sql.Connection
import java.sql.DriverManager
import java.io.IOException
import org.apache.spark.sql.DataFrame
def populateEvents() : Unit = {
var query = "SELECT brandname, appname, packname, eventname,
client, timezone FROM sams.events WHERE eventtime > '" + _from + "' AND
eventtime < '" + _to + "'"
// read data from cassandra table
val rdd = runCassandraQuery(query)
rdd.registerTempTable("newdf")
query = "Select brandname, appname, packname, eventname,
client.OSName as platform, timezone from newdf"
val dfCol = runCassandraQuery(query)
val grprdd = dfCol.groupBy("brandname", "appname",
"packname", "eventname", "platform", "timezone").count()
Do let me know if you need any more information
--
View this message in context:
http://apache-spark-developers-list.1001551.n3.nabble.com/Apache-gives-exception-when-running-groupby-on-df-temp-table-tp13275p13285.html
Sent from the Apache Spark Developers List mailing list archive at Nabble.com.
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]