Github user tdas commented on a diff in the pull request:
https://github.com/apache/spark/pull/17043#discussion_r103039931
--- Diff:
external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala
---
@@ -152,6 +157,56 @@ private[kafka010] class KafkaSourceProvider extends
DataSourceRegister with Stre
endingRelationOffsets)
}
+ override def createSink(
+ sqlContext: SQLContext,
+ parameters: Map[String, String],
+ partitionColumns: Seq[String],
+ outputMode: OutputMode): Sink = {
+ val caseInsensitiveParams = parameters.map { case (k, v) =>
(k.toLowerCase, v) }
+ val defaultTopic =
caseInsensitiveParams.get(TOPIC_OPTION_KEY).map(_.trim.toLowerCase)
+ val specifiedKafkaParams =
+ parameters
+ .keySet
+ .filter(_.toLowerCase.startsWith("kafka."))
+ .map { k => k.drop(6).toString -> parameters(k) }
+ .toMap + ("value.serializer" -> classOf[BytesSerializer].getName,
+ "key.serializer" -> classOf[BytesSerializer].getName)
+ new KafkaSink(sqlContext,
+ new ju.HashMap[String, Object](specifiedKafkaParams.asJava),
+ defaultTopic)
+ }
+
+ override def createRelation(
+ outerSQLContext: SQLContext,
+ mode: SaveMode,
+ parameters: Map[String, String],
+ data: DataFrame): BaseRelation = {
+ mode match {
+ case SaveMode.Overwrite | SaveMode.Ignore =>
+ throw new AnalysisException(s"save mode $mode not allowed for
Kafka. " +
+ s"Allowable save modes are ${SaveMode.Append} and " +
+ s"${SaveMode.ErrorIfExists} (default).")
+ case _ => // good
+ }
+ val caseInsensitiveParams = parameters.map { case (k, v) =>
(k.toLowerCase, v) }
+ val defaultTopic =
caseInsensitiveParams.get(TOPIC_OPTION_KEY).map(_.trim.toLowerCase)
+ val specifiedKafkaParams =
+ parameters
+ .keySet
+ .filter(_.toLowerCase.startsWith("kafka."))
+ .map { k => k.drop(6).toString -> parameters(k) }
+ .toMap + ("value.serializer" -> classOf[BytesSerializer].getName,
+ "key.serializer" -> classOf[BytesSerializer].getName)
+ KafkaWriter.write(outerSQLContext.sparkSession, data.queryExecution,
+ new ju.HashMap[String, Object](specifiedKafkaParams.asJava),
+ defaultTopic)
+
+ new BaseRelation {
--- End diff --
Do you know what is the relation that should be returned here? I dont see
good docs on this but from looking at the example of JdbcRelationProvider, you
have to return a relation that can read the data you have written. This is not
the case. Can you explain why and also document the reason.
---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]