Your schema is all fields are string: > val stdSchema= StructType(stdSchemaString.split(",").map(fieldName => > StructField(fieldName, StringType, true)))
But looks like you have integer columns in the RDD? Chetan Khatri wrote > Resolved above error by creating SparkSession > > val spark = SparkSession.builder().appName("Hbase - Spark > POC").getOrCreate() > > Error after: > > spark.sql("SELECT * FROM student").show() > > But while doing show() action on Dataframe throws below error: > > scala> sqlContext.sql("select * from student").show() > 16/12/28 21:04:23 ERROR executor.Executor: Exception in task 0.0 in stage > 2.0 (TID 2) > java.lang.RuntimeException: Error while encoding: > java.lang.RuntimeException: java.lang.Integer is not a valid external type > for schema of string > if (assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row > object).isNullAt) null else staticinvoke(class > org.apache.spark.unsafe.types.UTF8String, StringType, fromString, > validateexternaltype(getexternalrowfield(assertnotnull(input[0, > org.apache.spark.sql.Row, true], top level row object), 0, Rowid), > StringType), true) AS Rowid#35 > +- if (assertnotnull(input[0, org.apache.spark.sql.Row, true], top level > row object).isNullAt) null else staticinvoke(class > org.apache.spark.unsafe.types.UTF8String, StringType, fromString, > validateexternaltype(getexternalrowfield(assertnotnull(input[0, > org.apache.spark.sql.Row, true], top level row object), 0, Rowid), > StringType), true) > :- assertnotnull(input[0, org.apache.spark.sql.Row, true], top level > row > object).isNullAt > : :- assertnotnull(input[0, org.apache.spark.sql.Row, true], top level > row object) > : : +- input[0, org.apache.spark.sql.Row, true] > : +- 0 > :- null > +- staticinvoke(class org.apache.spark.unsafe.types.UTF8String, > StringType, fromString, > validateexternaltype(getexternalrowfield(assertnotnull(input[0, > org.apache.spark.sql.Row, true], top level row object), 0, Rowid), > StringType), true) > +- validateexternaltype(getexternalrowfield(assertnotnull(input[0, > org.apache.spark.sql.Row, true], top level row object), 0, Rowid), > StringType) > +- getexternalrowfield(assertnotnull(input[0, > org.apache.spark.sql.Row, true], top level row object), 0, Rowid) > +- assertnotnull(input[0, org.apache.spark.sql.Row, true], top > level row object) > +- input[0, org.apache.spark.sql.Row, true] > > if (assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row > object).isNullAt) null else staticinvoke(class > org.apache.spark.unsafe.types.UTF8String, StringType, fromString, > validateexternaltype(getexternalrowfield(assertnotnull(input[0, > org.apache.spark.sql.Row, true], top level row object), 1, maths), > StringType), true) AS maths#36 > +- if (assertnotnull(input[0, org.apache.spark.sql.Row, true], top level > row object).isNullAt) null else staticinvoke(class > org.apache.spark.unsafe.types.UTF8String, StringType, fromString, > validateexternaltype(getexternalrowfield(assertnotnull(input[0, > org.apache.spark.sql.Row, true], top level row object), 1, maths), > StringType), true) > :- assertnotnull(input[0, org.apache.spark.sql.Row, true], top level > row > object).isNullAt > : :- assertnotnull(input[0, org.apache.spark.sql.Row, true], top level > row object) > : : +- input[0, org.apache.spark.sql.Row, true] > : +- 1 > :- null > +- staticinvoke(class org.apache.spark.unsafe.types.UTF8String, > StringType, fromString, > validateexternaltype(getexternalrowfield(assertnotnull(input[0, > org.apache.spark.sql.Row, true], top level row object), 1, maths), > StringType), true) > +- validateexternaltype(getexternalrowfield(assertnotnull(input[0, > org.apache.spark.sql.Row, true], top level row object), 1, maths), > StringType) > +- getexternalrowfield(assertnotnull(input[0, > org.apache.spark.sql.Row, true], top level row object), 1, maths) > +- assertnotnull(input[0, org.apache.spark.sql.Row, true], top > level row object) > +- input[0, org.apache.spark.sql.Row, true] > > if (assertnotnull(input[0, org.apache.spark.sql.Row, true], top level row > object).isNullAt) null else staticinvoke(class > org.apache.spark.unsafe.types.UTF8String, StringType, fromString, > validateexternaltype(getexternalrowfield(assertnotnull(input[0, > org.apache.spark.sql.Row, true], top level row object), 2, english), > StringType), true) AS english#37 > +- if (assertnotnull(input[0, org.apache.spark.sql.Row, true], top level > row object).isNullAt) null else staticinvoke(class > org.apache.spark.unsafe.types.UTF8String, StringType, fromString, > validateexternaltype(getexternalrowfield(assertnotnull(input[0, > org.apache.spark.sql.Row, true], top level row object), 2, english), > StringType), true) > > Kindly help, unable to check with error that what exactly is. > > Thanks., > > > On Wed, Dec 28, 2016 at 9:00 PM, Chetan Khatri < > chetan.opensource@ > > > wrote: > >> Hello Spark Community, >> >> I am reading HBase table from Spark and getting RDD but now i wants to >> convert RDD of Spark Rows and want to convert to DF. >> >> *Source Code:* >> >> bin/spark-shell --packages >> it.nerdammer.bigdata:spark-hbase-connector_2.10:1.0.3 >> --conf spark.hbase.host=127.0.0.1 >> >> import it.nerdammer.spark.hbase._ >> import org.apache.spark.{SparkConf, SparkContext} >> import org.apache.spark.sql.Row >> import org.apache.spark.sql.types.StructType >> import org.apache.spark.sql.types.StructField >> import org.apache.spark.sql.types.StringType >> >> val sparkConf = new SparkConf().setAppName("HBase Spark POC") >> >> val sparkContext = new SparkContext(sparkConf) >> >> val sqlContext = new org.apache.spark.sql.SQLContext(sparkContext) >> >> val hBaseRDD = sc.hbaseTable[(Option[String], Option[Int], Option[Int], >> Option[Int], Option[Int], Option[Int])]("university").select("maths", >> "english","science","history","computer").inColumnFamily("school") >> >> val rowRDD = hBaseRDD.map(i => Row(i._1.get,i._2.get,i._3. >> get,i._4.get,i._5.get,i._6.get)) >> >> val stdSchemaString= "Rowid,maths,english,science,history,computer" >> >> val stdSchema= StructType(stdSchemaString.split(",").map(fieldName => >> StructField(fieldName, StringType, true))) >> >> val stdDf = sqlContext.createDataFrame(rowRDD,stdSchema); >> >> // Getting Error >> >> stdDf.registerTempTable("student") >> >> sqlContext.sql("select * from student").show() >> >> *Error* >> >> scala> val stdDf = sqlContext.createDataFrame(rowRDD,stdSchema); >> 16/12/28 20:50:59 ERROR metastore.RetryingHMSHandler: >> AlreadyExistsException(message:Database default already exists) >> at org.apache.hadoop.hive.metastore.HiveMetaStore$ >> HMSHandler.create_database(HiveMetaStore.java:891) >> at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) >> at sun.reflect.NativeMethodAccessorImpl.invoke( >> NativeMethodAccessorImpl.java:62) >> at sun.reflect.DelegatingMethodAccessorImpl.invoke( >> DelegatingMethodAccessorImpl.java:43) >> at java.lang.reflect.Method.invoke(Method.java:498) >> at org.apache.hadoop.hive.metastore.RetryingHMSHandler. >> invoke(RetryingHMSHandler.java:107) >> at com.sun.proxy.$Proxy21.create_database(Unknown Source) >> at org.apache.hadoop.hive.metastore.HiveMetaStoreClient.createDatabase( >> HiveMetaStoreClient.java:644) >> at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) >> at sun.reflect.NativeMethodAccessorImpl.invoke( >> NativeMethodAccessorImpl.java:62) >> at sun.reflect.DelegatingMethodAccessorImpl.invoke( >> DelegatingMethodAccessorImpl.java:43) >> at java.lang.reflect.Method.invoke(Method.java:498) >> at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.invoke( >> RetryingMetaStoreClient.java:156) >> at com.sun.proxy.$Proxy22.createDatabase(Unknown Source) >> at org.apache.hadoop.hive.ql.metadata.Hive.createDatabase(Hive.java:306) >> at org.apache.spark.sql.hive.client.HiveClientImpl$$ >> anonfun$createDatabase$1.apply$mcV$sp(HiveClientImpl.scala:309) >> at org.apache.spark.sql.hive.client.HiveClientImpl$$ >> anonfun$createDatabase$1.apply(HiveClientImpl.scala:309) >> at org.apache.spark.sql.hive.client.HiveClientImpl$$ >> anonfun$createDatabase$1.apply(HiveClientImpl.scala:309) >> at org.apache.spark.sql.hive.client.HiveClientImpl$$ >> anonfun$withHiveState$1.apply(HiveClientImpl.scala:280) >> at org.apache.spark.sql.hive.client.HiveClientImpl. >> liftedTree1$1(HiveClientImpl.scala:227) >> at org.apache.spark.sql.hive.client.HiveClientImpl. >> retryLocked(HiveClientImpl.scala:226) >> at org.apache.spark.sql.hive.client.HiveClientImpl. >> withHiveState(HiveClientImpl.scala:269) >> at org.apache.spark.sql.hive.client.HiveClientImpl. >> createDatabase(HiveClientImpl.scala:308) >> at org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$ >> createDatabase$1.apply$mcV$sp(HiveExternalCatalog.scala:99) >> at org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$ >> createDatabase$1.apply(HiveExternalCatalog.scala:99) >> at org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$ >> createDatabase$1.apply(HiveExternalCatalog.scala:99) >> at org.apache.spark.sql.hive.HiveExternalCatalog.withClient( >> HiveExternalCatalog.scala:72) >> at org.apache.spark.sql.hive.HiveExternalCatalog.createDatabase( >> HiveExternalCatalog.scala:98) >> at org.apache.spark.sql.catalyst.catalog.SessionCatalog. >> createDatabase(SessionCatalog.scala:147) >> at org.apache.spark.sql.catalyst.catalog.SessionCatalog. > <init> > ( >> SessionCatalog.scala:89) >> at org.apache.spark.sql.hive.HiveSessionCatalog. > <init> > ( >> HiveSessionCatalog.scala:51) >> at org.apache.spark.sql.hive.HiveSessionState.catalog$ >> lzycompute(HiveSessionState.scala:49) >> at org.apache.spark.sql.hive.HiveSessionState.catalog( >> HiveSessionState.scala:48) >> at org.apache.spark.sql.hive.HiveSessionState$$anon$1.< >> init>(HiveSessionState.scala:63) >> at org.apache.spark.sql.hive.HiveSessionState.analyzer$ >> lzycompute(HiveSessionState.scala:63) >> at org.apache.spark.sql.hive.HiveSessionState.analyzer( >> HiveSessionState.scala:62) >> at org.apache.spark.sql.execution.QueryExecution. >> assertAnalyzed(QueryExecution.scala:49) >> at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:64) >> at org.apache.spark.sql.SparkSession.createDataFrame( >> SparkSession.scala:542) >> at org.apache.spark.sql.SparkSession.createDataFrame( >> SparkSession.scala:302) >> at org.apache.spark.sql.SQLContext.createDataFrame(SQLContext.scala:337) >> at $line34.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw. > <init> >> ( > <console> > :42) >> at $line34.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw. > <init> > ( > <console> > :47) >> at $line34.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw. > <init> > ( > <console> > :49) >> at $line34.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw. > <init> > ( > <console> > :51) >> at $line34.$read$$iw$$iw$$iw$$iw$$iw$$iw. > <init> > ( > <console> > :53) >> at $line34.$read$$iw$$iw$$iw$$iw$$iw. > <init> > ( > <console> > :55) >> at $line34.$read$$iw$$iw$$iw$$iw. > <init> > ( > <console> > :57) >> at $line34.$read$$iw$$iw$$iw. > <init> > ( > <console> > :59) >> at $line34.$read$$iw$$iw. > <init> > ( > <console> > :61) >> at $line34.$read$$iw. > <init> > ( > <console> > :63) >> at $line34.$read. > <init> > ( > <console> > :65) >> at $line34.$read$. > <init> > ( > <console> > :69) >> at $line34.$read$. > <clinit> > ( > <console> > ) >> at $line34.$eval$.$print$lzycompute( > <console> > :7) >> at $line34.$eval$.$print( > <console> > :6) >> at $line34.$eval.$print( > <console> > ) >> at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) >> at sun.reflect.NativeMethodAccessorImpl.invoke( >> NativeMethodAccessorImpl.java:62) >> at sun.reflect.DelegatingMethodAccessorImpl.invoke( >> DelegatingMethodAccessorImpl.java:43) >> at java.lang.reflect.Method.invoke(Method.java:498) >> at scala.tools.nsc.interpreter.IMain$ReadEvalPrint.call(IMain.scala:786) >> at scala.tools.nsc.interpreter.IMain$Request.loadAndRun(IMain.scala:1047) >> at scala.tools.nsc.interpreter.IMain$WrappedRequest$$anonfun$ >> loadAndRunReq$1.apply(IMain.scala:638) >> at scala.tools.nsc.interpreter.IMain$WrappedRequest$$anonfun$ >> loadAndRunReq$1.apply(IMain.scala:637) >> at scala.reflect.internal.util.ScalaClassLoader$class. >> asContext(ScalaClassLoader.scala:31) >> at scala.reflect.internal.util.AbstractFileClassLoader.asContext( >> AbstractFileClassLoader.scala:19) >> at scala.tools.nsc.interpreter.IMain$WrappedRequest. >> loadAndRunReq(IMain.scala:637) >> at scala.tools.nsc.interpreter.IMain.interpret(IMain.scala:569) >> at scala.tools.nsc.interpreter.IMain.interpret(IMain.scala:565) >> at scala.tools.nsc.interpreter.ILoop.interpretStartingWith( >> ILoop.scala:807) >> at scala.tools.nsc.interpreter.ILoop.command(ILoop.scala:681) >> at scala.tools.nsc.interpreter.ILoop.processLine(ILoop.scala:395) >> at scala.tools.nsc.interpreter.ILoop.loop(ILoop.scala:415) >> at scala.tools.nsc.interpreter.ILoop$$anonfun$process$1. >> apply$mcZ$sp(ILoop.scala:923) >> at scala.tools.nsc.interpreter.ILoop$$anonfun$process$1. >> apply(ILoop.scala:909) >> at scala.tools.nsc.interpreter.ILoop$$anonfun$process$1. >> apply(ILoop.scala:909) >> at scala.reflect.internal.util.ScalaClassLoader$.savingContextLoader( >> ScalaClassLoader.scala:97) >> at scala.tools.nsc.interpreter.ILoop.process(ILoop.scala:909) >> at org.apache.spark.repl.Main$.doMain(Main.scala:68) >> at org.apache.spark.repl.Main$.main(Main.scala:51) >> at org.apache.spark.repl.Main.main(Main.scala) >> at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) >> at sun.reflect.NativeMethodAccessorImpl.invoke( >> NativeMethodAccessorImpl.java:62) >> at sun.reflect.DelegatingMethodAccessorImpl.invoke( >> DelegatingMethodAccessorImpl.java:43) >> at java.lang.reflect.Method.invoke(Method.java:498) >> at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$ >> deploy$SparkSubmit$$runMain(SparkSubmit.scala:736) >> at >> org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:185) >> at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:210) >> at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:124) >> at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) >> >> stdDf: org.apache.spark.sql.DataFrame = [Rowid: string, maths: string ... >> 4 more fields] >> >> What would be resolution ? >> >> Thanks, >> Chetan >> >> >> >> >> ----- Liang-Chi Hsieh | @viirya Spark Technology Center http://www.spark.tc/ -- View this message in context: http://apache-spark-developers-list.1001551.n3.nabble.com/Error-at-sqlContext-createDataFrame-with-RDD-and-Schema-tp20382p20387.html Sent from the Apache Spark Developers List mailing list archive at Nabble.com. --------------------------------------------------------------------- To unsubscribe e-mail: dev-unsubscr...@spark.apache.org