[GitHub] [spark] gaborgsomogyi commented on a change in pull request #25412: [SPARK-28691][EXAMPLES] Add Java/Scala DirectKerberizedKafkaWordCount examples

2019-08-26 Thread GitBox
gaborgsomogyi commented on a change in pull request #25412: 
[SPARK-28691][EXAMPLES] Add Java/Scala DirectKerberizedKafkaWordCount examples
URL: https://github.com/apache/spark/pull/25412#discussion_r317534274
 
 

 ##
 File path: 
examples/src/main/java/org/apache/spark/examples/streaming/JavaDirectKerberizedKafkaWordCount.java
 ##
 @@ -0,0 +1,138 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.streaming;
+
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Arrays;
+import java.util.Map;
+import java.util.Set;
+import java.util.regex.Pattern;
+
+import scala.Tuple2;
+
+import org.apache.kafka.clients.CommonClientConfigs;
+import org.apache.kafka.common.security.auth.SecurityProtocol;
+import org.apache.kafka.clients.consumer.ConsumerConfig;
+import org.apache.kafka.clients.consumer.ConsumerRecord;
+import org.apache.kafka.common.serialization.StringDeserializer;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.streaming.api.java.*;
+import org.apache.spark.streaming.kafka010.ConsumerStrategies;
+import org.apache.spark.streaming.kafka010.KafkaUtils;
+import org.apache.spark.streaming.kafka010.LocationStrategies;
+import org.apache.spark.streaming.Durations;
+
+/**
+ * Consumes messages from one or more topics in Kafka and does wordcount.
+ * Usage: JavaDirectKerberizedKafkaWordCount   
+ *is a list of one or more Kafka brokers
+ *is a consumer group name to consume from topics
+ *is a list of one or more kafka topics to consume from
+ *
+ * Example:
+ *   Yarn client:
+ *$ bin/run-example --files 
${jaas_path}/kafka_jaas.conf,${keytab_path}/kafka.service.keytab \
+ *  --driver-java-options 
"-Djava.security.auth.login.config=${path}/kafka_driver_jaas.conf" \
+ *  --conf \
+ *  
"spark.executor.extraJavaOptions=-Djava.security.auth.login.config=./kafka_jaas.conf"
 \
+ *  --master yarn
+ *  streaming.JavaDirectKerberizedKafkaWordCount 
broker1-host:port,broker2-host:port \
+ *  consumer-group topic1,topic2
+ *   Yarn cluster:
+ *$ bin/run-example --files \
+ *  
${jaas_path}/kafka_jaas.conf,${keytab_path}/kafka.service.keytab,${krb5_path}/krb5.conf
 \
+ *  --driver-java-options \
+ *  "-Djava.security.auth.login.config=./kafka_jaas.conf \
+ *  -Djava.security.krb5.conf=./krb5.conf" \
 
 Review comment:
   So, the cluster nodes are configured differently. That's a good explanation.


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] gaborgsomogyi commented on a change in pull request #25412: [SPARK-28691][EXAMPLES] Add Java/Scala DirectKerberizedKafkaWordCount examples

2019-08-26 Thread GitBox
gaborgsomogyi commented on a change in pull request #25412: 
[SPARK-28691][EXAMPLES] Add Java/Scala DirectKerberizedKafkaWordCount examples
URL: https://github.com/apache/spark/pull/25412#discussion_r317533705
 
 

 ##
 File path: 
examples/src/main/scala/org/apache/spark/examples/streaming/DirectKerberizedKafkaWordCount.scala
 ##
 @@ -0,0 +1,123 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.streaming
+
+import org.apache.kafka.clients.CommonClientConfigs
+import org.apache.kafka.clients.consumer.ConsumerConfig
+import org.apache.kafka.common.security.auth.SecurityProtocol
+import org.apache.kafka.common.serialization.StringDeserializer
+
+import org.apache.spark.SparkConf
+import org.apache.spark.streaming._
+import org.apache.spark.streaming.kafka010._
+
+/**
+ * Consumes messages from one or more topics in Kafka and does wordcount.
+ * Usage: DirectKerberizedKafkaWordCount  
+ *is a list of one or more Kafka brokers
+ *is a consumer group name to consume from topics
+ *is a list of one or more kafka topics to consume from
+ *
+ * Example:
+ *   Yarn client:
+ *$ bin/run-example --files 
${jaas_path}/kafka_jaas.conf,${keytab_path}/kafka.service.keytab \
+ *  --driver-java-options 
"-Djava.security.auth.login.config=${path}/kafka_driver_jaas.conf" \
+ *  --conf \
+ *  
"spark.executor.extraJavaOptions=-Djava.security.auth.login.config=./kafka_jaas.conf"
 \
+ *  --master yarn
+ *  streaming.DirectKerberizedKafkaWordCount 
broker1-host:port,broker2-host:port \
+ *  consumer-group topic1,topic2
+ *   Yarn cluster:
+ *$ bin/run-example --files \
+ *  
${jaas_path}/kafka_jaas.conf,${keytab_path}/kafka.service.keytab,${krb5_path}/krb5.conf
 \
+ *  --driver-java-options \
+ *  "-Djava.security.auth.login.config=./kafka_jaas.conf \
+ *  -Djava.security.krb5.conf=./krb5.conf" \
+ *  --conf \
+ *  
"spark.executor.extraJavaOptions=-Djava.security.auth.login.config=./kafka_jaas.conf"
 \
+ *  --master yarn --deploy-mode cluster \
+ *  streaming.DirectKerberizedKafkaWordCount 
broker1-host:port,broker2-host:port \
+ *  consumer-group topic1,topic2
+ *
+ * kafka_jaas.conf can manually create, template as:
+ *   KafkaClient {
+ * com.sun.security.auth.module.Krb5LoginModule required
+ * keyTab="./kafka.service.keytab"
+ * useKeyTab=true
+ * storeKey=true
+ * useTicketCache=false
+ * serviceName="kafka"
+ * principal="kafka/h...@example.com";
+ *   };
+ * kafka_driver_jaas.conf (used by yarn client) and kafka_jaas.conf are 
basically the same
+ * except for some differences at 'keyTab'. In kafka_driver_jaas.conf, 
'keyTab' should be
+ * "${keytab_path}/kafka.service.keytab".
+ * In addition, for IBM JVMs, please use 
'com.ibm.security.auth.module.Krb5LoginModule' 
+ * instead of 'com.sun.security.auth.module.Krb5LoginModule'.
+ *
+ * Note that this example uses SASL_PLAINTEXT for simplicity; however,
+ * SASL_PLAINTEXT has no SSL encryption and likely be less secure. Please 
consider
+ * using SASL_SSL in production.
+ */
+object DirectKerberizedKafkaWordCount {
+  def main(args: Array[String]) {
+if (args.length < 3) {
+  System.err.println(s"""
+|Usage: DirectKerberizedKafkaWordCount  
 
 
 Review comment:
   Final nit: indent.


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] gaborgsomogyi commented on a change in pull request #25412: [SPARK-28691][EXAMPLES] Add Java/Scala DirectKerberizedKafkaWordCount examples

2019-08-26 Thread GitBox
gaborgsomogyi commented on a change in pull request #25412: 
[SPARK-28691][EXAMPLES] Add Java/Scala DirectKerberizedKafkaWordCount examples
URL: https://github.com/apache/spark/pull/25412#discussion_r317532103
 
 

 ##
 File path: 
examples/src/main/java/org/apache/spark/examples/streaming/JavaDirectKerberizedKafkaWordCount.java
 ##
 @@ -0,0 +1,118 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.streaming;
+
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Arrays;
+import java.util.Map;
+import java.util.Set;
+import java.util.regex.Pattern;
+
+import scala.Tuple2;
+
+import org.apache.kafka.clients.CommonClientConfigs;
+import org.apache.kafka.common.security.auth.SecurityProtocol;
+import org.apache.kafka.clients.consumer.ConsumerConfig;
+import org.apache.kafka.clients.consumer.ConsumerRecord;
+import org.apache.kafka.common.serialization.StringDeserializer;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.streaming.api.java.*;
+import org.apache.spark.streaming.kafka010.ConsumerStrategies;
+import org.apache.spark.streaming.kafka010.KafkaUtils;
+import org.apache.spark.streaming.kafka010.LocationStrategies;
+import org.apache.spark.streaming.Durations;
+
+/**
+ * Consumes messages from one or more topics in Kafka and does wordcount.
+ * Usage: JavaDirectKerberizedKafkaWordCount   
+ *is a list of one or more Kafka brokers
+ *is a consumer group name to consume from topics
+ *is a list of one or more kafka topics to consume from
+ *
+ * Example:
+ *$ bin/run-example --files ${path}/kafka_jaas.conf \
+ *  --driver-java-options 
"-Djava.security.auth.login.config=${path}/kafka_jaas.conf" \
+ *  --conf \
+ *  
"spark.executor.extraJavaOptions=-Djava.security.auth.login.config=./kafka_jaas.conf"
 \
+ *  streaming.JavaDirectKerberizedKafkaWordCount 
broker1-host:port,broker2-host:port \
+ *  consumer-group topic1,topic2
+ *
+ * kafka_jaas.conf can manually create, template as:
+ *   KafkaClient {
+ * com.sun.security.auth.module.Krb5LoginModule required
 
 Review comment:
   OK, this can be resolved.


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] gaborgsomogyi commented on a change in pull request #25412: [SPARK-28691][EXAMPLES] Add Java/Scala DirectKerberizedKafkaWordCount examples

2019-08-21 Thread GitBox
gaborgsomogyi commented on a change in pull request #25412: 
[SPARK-28691][EXAMPLES] Add Java/Scala DirectKerberizedKafkaWordCount examples
URL: https://github.com/apache/spark/pull/25412#discussion_r316100781
 
 

 ##
 File path: 
examples/src/main/java/org/apache/spark/examples/streaming/JavaDirectKerberizedKafkaWordCount.java
 ##
 @@ -0,0 +1,138 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.streaming;
+
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Arrays;
+import java.util.Map;
+import java.util.Set;
+import java.util.regex.Pattern;
+
+import scala.Tuple2;
+
+import org.apache.kafka.clients.CommonClientConfigs;
+import org.apache.kafka.common.security.auth.SecurityProtocol;
+import org.apache.kafka.clients.consumer.ConsumerConfig;
+import org.apache.kafka.clients.consumer.ConsumerRecord;
+import org.apache.kafka.common.serialization.StringDeserializer;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.streaming.api.java.*;
+import org.apache.spark.streaming.kafka010.ConsumerStrategies;
+import org.apache.spark.streaming.kafka010.KafkaUtils;
+import org.apache.spark.streaming.kafka010.LocationStrategies;
+import org.apache.spark.streaming.Durations;
+
+/**
+ * Consumes messages from one or more topics in Kafka and does wordcount.
+ * Usage: JavaDirectKerberizedKafkaWordCount   
+ *is a list of one or more Kafka brokers
+ *is a consumer group name to consume from topics
+ *is a list of one or more kafka topics to consume from
+ *
+ * Example:
+ *   Yarn client:
+ *$ bin/run-example --files 
${jaas_path}/kafka_jaas.conf,${keytab_path}/kafka.service.keytab \
+ *  --driver-java-options 
"-Djava.security.auth.login.config=${path}/kafka_driver_jaas.conf" \
+ *  --conf \
+ *  
"spark.executor.extraJavaOptions=-Djava.security.auth.login.config=./kafka_jaas.conf"
 \
+ *  --master yarn
+ *  streaming.JavaDirectKerberizedKafkaWordCount 
broker1-host:port,broker2-host:port \
+ *  consumer-group topic1,topic2
+ *   Yarn cluster:
+ *$ bin/run-example --files \
+ *  
${jaas_path}/kafka_jaas.conf,${keytab_path}/kafka.service.keytab,${krb5_path}/krb5.conf
 \
+ *  --driver-java-options \
+ *  "-Djava.security.auth.login.config=./kafka_jaas.conf \
+ *  -Djava.security.krb5.conf=./krb5.conf" \
 
 Review comment:
   2 questions:
   * Why is `krb5.conf` needed for cluster mode?
   * Why is `krb5.conf` not needed for client mode?
   


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] gaborgsomogyi commented on a change in pull request #25412: [SPARK-28691][EXAMPLES] Add Java/Scala DirectKerberizedKafkaWordCount examples

2019-08-21 Thread GitBox
gaborgsomogyi commented on a change in pull request #25412: 
[SPARK-28691][EXAMPLES] Add Java/Scala DirectKerberizedKafkaWordCount examples
URL: https://github.com/apache/spark/pull/25412#discussion_r316096703
 
 

 ##
 File path: 
examples/src/main/java/org/apache/spark/examples/streaming/JavaDirectKerberizedKafkaWordCount.java
 ##
 @@ -0,0 +1,118 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.streaming;
+
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Arrays;
+import java.util.Map;
+import java.util.Set;
+import java.util.regex.Pattern;
+
+import scala.Tuple2;
+
+import org.apache.kafka.clients.CommonClientConfigs;
+import org.apache.kafka.common.security.auth.SecurityProtocol;
+import org.apache.kafka.clients.consumer.ConsumerConfig;
+import org.apache.kafka.clients.consumer.ConsumerRecord;
+import org.apache.kafka.common.serialization.StringDeserializer;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.streaming.api.java.*;
+import org.apache.spark.streaming.kafka010.ConsumerStrategies;
+import org.apache.spark.streaming.kafka010.KafkaUtils;
+import org.apache.spark.streaming.kafka010.LocationStrategies;
+import org.apache.spark.streaming.Durations;
+
+/**
+ * Consumes messages from one or more topics in Kafka and does wordcount.
+ * Usage: JavaDirectKerberizedKafkaWordCount   
+ *is a list of one or more Kafka brokers
+ *is a consumer group name to consume from topics
+ *is a list of one or more kafka topics to consume from
+ *
+ * Example:
+ *$ bin/run-example --files ${path}/kafka_jaas.conf \
+ *  --driver-java-options 
"-Djava.security.auth.login.config=${path}/kafka_jaas.conf" \
+ *  --conf \
+ *  
"spark.executor.extraJavaOptions=-Djava.security.auth.login.config=./kafka_jaas.conf"
 \
+ *  streaming.JavaDirectKerberizedKafkaWordCount 
broker1-host:port,broker2-host:port \
+ *  consumer-group topic1,topic2
+ *
+ * kafka_jaas.conf can manually create, template as:
+ *   KafkaClient {
+ * com.sun.security.auth.module.Krb5LoginModule required
 
 Review comment:
   This would be good to mention as a comment since users may not read this 
discussion.


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] gaborgsomogyi commented on a change in pull request #25412: [SPARK-28691][EXAMPLES] Add Java/Scala DirectKerberizedKafkaWordCount examples

2019-08-21 Thread GitBox
gaborgsomogyi commented on a change in pull request #25412: 
[SPARK-28691][EXAMPLES] Add Java/Scala DirectKerberizedKafkaWordCount examples
URL: https://github.com/apache/spark/pull/25412#discussion_r316100243
 
 

 ##
 File path: 
examples/src/main/java/org/apache/spark/examples/streaming/JavaDirectKerberizedKafkaWordCount.java
 ##
 @@ -0,0 +1,138 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.streaming;
+
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Arrays;
+import java.util.Map;
+import java.util.Set;
+import java.util.regex.Pattern;
+
+import scala.Tuple2;
+
+import org.apache.kafka.clients.CommonClientConfigs;
+import org.apache.kafka.common.security.auth.SecurityProtocol;
+import org.apache.kafka.clients.consumer.ConsumerConfig;
+import org.apache.kafka.clients.consumer.ConsumerRecord;
+import org.apache.kafka.common.serialization.StringDeserializer;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.streaming.api.java.*;
+import org.apache.spark.streaming.kafka010.ConsumerStrategies;
+import org.apache.spark.streaming.kafka010.KafkaUtils;
+import org.apache.spark.streaming.kafka010.LocationStrategies;
+import org.apache.spark.streaming.Durations;
+
+/**
+ * Consumes messages from one or more topics in Kafka and does wordcount.
+ * Usage: JavaDirectKerberizedKafkaWordCount   
+ *is a list of one or more Kafka brokers
+ *is a consumer group name to consume from topics
+ *is a list of one or more kafka topics to consume from
+ *
+ * Example:
+ *   Yarn client:
+ *$ bin/run-example --files 
${jaas_path}/kafka_jaas.conf,${keytab_path}/kafka.service.keytab \
+ *  --driver-java-options 
"-Djava.security.auth.login.config=${path}/kafka_driver_jaas.conf" \
+ *  --conf \
+ *  
"spark.executor.extraJavaOptions=-Djava.security.auth.login.config=./kafka_jaas.conf"
 \
+ *  --master yarn
+ *  streaming.JavaDirectKerberizedKafkaWordCount 
broker1-host:port,broker2-host:port \
+ *  consumer-group topic1,topic2
+ *   Yarn cluster:
+ *$ bin/run-example --files \
+ *  
${jaas_path}/kafka_jaas.conf,${keytab_path}/kafka.service.keytab,${krb5_path}/krb5.conf
 \
+ *  --driver-java-options \
+ *  "-Djava.security.auth.login.config=./kafka_jaas.conf \
+ *  -Djava.security.krb5.conf=./krb5.conf" \
+ *  --conf \
+ *  
"spark.executor.extraJavaOptions=-Djava.security.auth.login.config=./kafka_jaas.conf"
 \
+ *  --master yarn --deploy-mode cluster \
+ *  streaming.JavaDirectKerberizedKafkaWordCount 
broker1-host:port,broker2-host:port \
+ *  consumer-group topic1,topic2
+ *
+ * kafka_jaas.conf can manually create, template as:
+ *   KafkaClient {
+ * com.sun.security.auth.module.Krb5LoginModule required
+ * keyTab="./kafka.service.keytab"
+ * useKeyTab=true
+ * storeKey=true
+ * useTicketCache=false
+ * serviceName="kafka"
+ * principal="kafka/h...@example.com";
+ *   };
+ * kafka_driver_jaas.conf (used by yarn client) and kafka_jaas.conf are 
basically the same
 
 Review comment:
   If `--files` used for keytab and jaas file then both driver and executor can 
pick up the same jaas (in keytab file `./kafka.service.keytab` has to be set). 
Please see 
https://github.com/gaborgsomogyi/spark-structured-secure-kafka-app#spark-submit 
Is that not working somehow?
   


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] gaborgsomogyi commented on a change in pull request #25412: [SPARK-28691][EXAMPLES] Add Java/Scala DirectKerberizedKafkaWordCount examples

2019-08-16 Thread GitBox
gaborgsomogyi commented on a change in pull request #25412: 
[SPARK-28691][EXAMPLES] Add Java/Scala DirectKerberizedKafkaWordCount examples
URL: https://github.com/apache/spark/pull/25412#discussion_r314693286
 
 

 ##
 File path: 
examples/src/main/java/org/apache/spark/examples/streaming/JavaDirectKerberizedKafkaWordCount.java
 ##
 @@ -0,0 +1,118 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.streaming;
+
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Arrays;
+import java.util.Map;
+import java.util.Set;
+import java.util.regex.Pattern;
+
+import scala.Tuple2;
+
+import org.apache.kafka.clients.CommonClientConfigs;
+import org.apache.kafka.common.security.auth.SecurityProtocol;
+import org.apache.kafka.clients.consumer.ConsumerConfig;
+import org.apache.kafka.clients.consumer.ConsumerRecord;
+import org.apache.kafka.common.serialization.StringDeserializer;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.streaming.api.java.*;
+import org.apache.spark.streaming.kafka010.ConsumerStrategies;
+import org.apache.spark.streaming.kafka010.KafkaUtils;
+import org.apache.spark.streaming.kafka010.LocationStrategies;
+import org.apache.spark.streaming.Durations;
+
+/**
+ * Consumes messages from one or more topics in Kafka and does wordcount.
+ * Usage: JavaDirectKerberizedKafkaWordCount   
+ *is a list of one or more Kafka brokers
+ *is a consumer group name to consume from topics
+ *is a list of one or more kafka topics to consume from
+ *
+ * Example:
+ *$ bin/run-example --files ${path}/kafka_jaas.conf \
+ *  --driver-java-options 
"-Djava.security.auth.login.config=${path}/kafka_jaas.conf" \
+ *  --conf \
+ *  
"spark.executor.extraJavaOptions=-Djava.security.auth.login.config=./kafka_jaas.conf"
 \
+ *  streaming.JavaDirectKerberizedKafkaWordCount 
broker1-host:port,broker2-host:port \
+ *  consumer-group topic1,topic2
+ *
+ * kafka_jaas.conf can manually create, template as:
+ *   KafkaClient {
+ * com.sun.security.auth.module.Krb5LoginModule required
+ * keyTab="${path_of_keytab}/kafka.service.keytab"
 
 Review comment:
   Your last example is almost good but it will work only in client mode. In 
cluster mode the driver runs on a random machine where maybe the keytab not 
exists. I would suggest this:
   ```
   bin/run-example --files 
${jaas_path}/jaas.conf,${keytab_path}/kafka.service.keytab \
   --driver-java-options "-Djava.security.auth.login.config=./jaas.conf" \
   --conf 
"spark.executor.extraJavaOptions=-Djava.security.auth.login.config=./jaas.conf" 
\
   streaming.JavaDirectKerberizedKafkaWordCount 
broker1-host:port,broker2-host:port \
   consumer-group topic1,topic2
   ```
   and in the jaas file:
   ```
   ...
 keyTab="./kafka.service.keytab"
   ...
   ```
   


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] gaborgsomogyi commented on a change in pull request #25412: [SPARK-28691][EXAMPLES] Add Java/Scala DirectKerberizedKafkaWordCount examples

2019-08-16 Thread GitBox
gaborgsomogyi commented on a change in pull request #25412: 
[SPARK-28691][EXAMPLES] Add Java/Scala DirectKerberizedKafkaWordCount examples
URL: https://github.com/apache/spark/pull/25412#discussion_r314695471
 
 

 ##
 File path: 
examples/src/main/java/org/apache/spark/examples/streaming/JavaDirectKerberizedKafkaWordCount.java
 ##
 @@ -0,0 +1,118 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.streaming;
+
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Arrays;
+import java.util.Map;
+import java.util.Set;
+import java.util.regex.Pattern;
+
+import scala.Tuple2;
+
+import org.apache.kafka.clients.CommonClientConfigs;
+import org.apache.kafka.common.security.auth.SecurityProtocol;
+import org.apache.kafka.clients.consumer.ConsumerConfig;
+import org.apache.kafka.clients.consumer.ConsumerRecord;
+import org.apache.kafka.common.serialization.StringDeserializer;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.streaming.api.java.*;
+import org.apache.spark.streaming.kafka010.ConsumerStrategies;
+import org.apache.spark.streaming.kafka010.KafkaUtils;
+import org.apache.spark.streaming.kafka010.LocationStrategies;
+import org.apache.spark.streaming.Durations;
+
+/**
+ * Consumes messages from one or more topics in Kafka and does wordcount.
+ * Usage: JavaDirectKerberizedKafkaWordCount   
+ *is a list of one or more Kafka brokers
+ *is a consumer group name to consume from topics
+ *is a list of one or more kafka topics to consume from
+ *
+ * Example:
+ *$ bin/run-example --files ${path}/kafka_jaas.conf \
+ *  --driver-java-options 
"-Djava.security.auth.login.config=${path}/kafka_jaas.conf" \
+ *  --conf \
+ *  
"spark.executor.extraJavaOptions=-Djava.security.auth.login.config=./kafka_jaas.conf"
 \
+ *  streaming.JavaDirectKerberizedKafkaWordCount 
broker1-host:port,broker2-host:port \
+ *  consumer-group topic1,topic2
+ *
+ * kafka_jaas.conf can manually create, template as:
+ *   KafkaClient {
+ * com.sun.security.auth.module.Krb5LoginModule required
+ * keyTab="${path_of_keytab}/kafka.service.keytab"
+ * useKeyTab=true
+ * storeKey=true
+ * useTicketCache=false
+ * serviceName="kafka"
+ * principal="kafka/server@example";
 
 Review comment:
   The default realm is `EXAMPLE.COM` in Kerby and in `krb5` the configuration 
guide suggests the same. With `example` realm the example app failed with 
authentication error.


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] gaborgsomogyi commented on a change in pull request #25412: [SPARK-28691][EXAMPLES] Add Java/Scala DirectKerberizedKafkaWordCount examples

2019-08-16 Thread GitBox
gaborgsomogyi commented on a change in pull request #25412: 
[SPARK-28691][EXAMPLES] Add Java/Scala DirectKerberizedKafkaWordCount examples
URL: https://github.com/apache/spark/pull/25412#discussion_r314693286
 
 

 ##
 File path: 
examples/src/main/java/org/apache/spark/examples/streaming/JavaDirectKerberizedKafkaWordCount.java
 ##
 @@ -0,0 +1,118 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.streaming;
+
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Arrays;
+import java.util.Map;
+import java.util.Set;
+import java.util.regex.Pattern;
+
+import scala.Tuple2;
+
+import org.apache.kafka.clients.CommonClientConfigs;
+import org.apache.kafka.common.security.auth.SecurityProtocol;
+import org.apache.kafka.clients.consumer.ConsumerConfig;
+import org.apache.kafka.clients.consumer.ConsumerRecord;
+import org.apache.kafka.common.serialization.StringDeserializer;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.streaming.api.java.*;
+import org.apache.spark.streaming.kafka010.ConsumerStrategies;
+import org.apache.spark.streaming.kafka010.KafkaUtils;
+import org.apache.spark.streaming.kafka010.LocationStrategies;
+import org.apache.spark.streaming.Durations;
+
+/**
+ * Consumes messages from one or more topics in Kafka and does wordcount.
+ * Usage: JavaDirectKerberizedKafkaWordCount   
+ *is a list of one or more Kafka brokers
+ *is a consumer group name to consume from topics
+ *is a list of one or more kafka topics to consume from
+ *
+ * Example:
+ *$ bin/run-example --files ${path}/kafka_jaas.conf \
+ *  --driver-java-options 
"-Djava.security.auth.login.config=${path}/kafka_jaas.conf" \
+ *  --conf \
+ *  
"spark.executor.extraJavaOptions=-Djava.security.auth.login.config=./kafka_jaas.conf"
 \
+ *  streaming.JavaDirectKerberizedKafkaWordCount 
broker1-host:port,broker2-host:port \
+ *  consumer-group topic1,topic2
+ *
+ * kafka_jaas.conf can manually create, template as:
+ *   KafkaClient {
+ * com.sun.security.auth.module.Krb5LoginModule required
+ * keyTab="${path_of_keytab}/kafka.service.keytab"
 
 Review comment:
   Your last example is almost good but it will work only in client mode. In 
cluster mode the driver runs on a random machine where maybe the keytab not 
exists. I would suggest this:
   ```
   bin/run-example --files 
${jaas_path}/jaas.conf,${kyetab_path}/kafka.service.keytab \
   --driver-java-options "-Djava.security.auth.login.config=./jaas.conf" \
   --conf 
"spark.executor.extraJavaOptions=-Djava.security.auth.login.config=./jaas.conf" 
\
   streaming.JavaDirectKerberizedKafkaWordCount 
broker1-host:port,broker2-host:port \
   consumer-group topic1,topic2
   ```
   and in the jaas file:
   ```
   ...
 keyTab="./kafka.service.keytab"
   ...
   ```
   


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] gaborgsomogyi commented on a change in pull request #25412: [SPARK-28691][EXAMPLES] Add Java/Scala DirectKerberizedKafkaWordCount examples

2019-08-15 Thread GitBox
gaborgsomogyi commented on a change in pull request #25412: 
[SPARK-28691][EXAMPLES] Add Java/Scala DirectKerberizedKafkaWordCount examples
URL: https://github.com/apache/spark/pull/25412#discussion_r314269808
 
 

 ##
 File path: 
examples/src/main/java/org/apache/spark/examples/streaming/JavaDirectKerberizedKafkaWordCount.java
 ##
 @@ -0,0 +1,118 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.streaming;
+
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Arrays;
+import java.util.Map;
+import java.util.Set;
+import java.util.regex.Pattern;
+
+import scala.Tuple2;
+
+import org.apache.kafka.clients.CommonClientConfigs;
+import org.apache.kafka.common.security.auth.SecurityProtocol;
+import org.apache.kafka.clients.consumer.ConsumerConfig;
+import org.apache.kafka.clients.consumer.ConsumerRecord;
+import org.apache.kafka.common.serialization.StringDeserializer;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.streaming.api.java.*;
+import org.apache.spark.streaming.kafka010.ConsumerStrategies;
+import org.apache.spark.streaming.kafka010.KafkaUtils;
+import org.apache.spark.streaming.kafka010.LocationStrategies;
+import org.apache.spark.streaming.Durations;
+
+/**
+ * Consumes messages from one or more topics in Kafka and does wordcount.
+ * Usage: JavaDirectKerberizedKafkaWordCount   
+ *is a list of one or more Kafka brokers
+ *is a consumer group name to consume from topics
+ *is a list of one or more kafka topics to consume from
+ *
+ * Example:
+ *$ bin/run-example --files ${path}/kafka_jaas.conf \
+ *  --driver-java-options 
"-Djava.security.auth.login.config=${path}/kafka_jaas.conf" \
+ *  --conf \
+ *  
"spark.executor.extraJavaOptions=-Djava.security.auth.login.config=./kafka_jaas.conf"
 \
+ *  streaming.JavaDirectKerberizedKafkaWordCount 
broker1-host:port,broker2-host:port \
+ *  consumer-group topic1,topic2
+ *
+ * kafka_jaas.conf can manually create, template as:
+ *   KafkaClient {
+ * com.sun.security.auth.module.Krb5LoginModule required
+ * keyTab="${path_of_keytab}/kafka.service.keytab"
 
 Review comment:
   I'm fine to add keytab file to the `--files` section but then the keytab 
path in the jaas file has to be modified to `./kafka.service.keytab` since 
`--files` doesn't preserve the path.


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] gaborgsomogyi commented on a change in pull request #25412: [SPARK-28691][EXAMPLES] Add Java/Scala DirectKerberizedKafkaWordCount examples

2019-08-15 Thread GitBox
gaborgsomogyi commented on a change in pull request #25412: 
[SPARK-28691][EXAMPLES] Add Java/Scala DirectKerberizedKafkaWordCount examples
URL: https://github.com/apache/spark/pull/25412#discussion_r314268725
 
 

 ##
 File path: 
examples/src/main/java/org/apache/spark/examples/streaming/JavaDirectKerberizedKafkaWordCount.java
 ##
 @@ -0,0 +1,118 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.streaming;
+
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Arrays;
+import java.util.Map;
+import java.util.Set;
+import java.util.regex.Pattern;
+
+import scala.Tuple2;
+
+import org.apache.kafka.clients.CommonClientConfigs;
+import org.apache.kafka.common.security.auth.SecurityProtocol;
+import org.apache.kafka.clients.consumer.ConsumerConfig;
+import org.apache.kafka.clients.consumer.ConsumerRecord;
+import org.apache.kafka.common.serialization.StringDeserializer;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.streaming.api.java.*;
+import org.apache.spark.streaming.kafka010.ConsumerStrategies;
+import org.apache.spark.streaming.kafka010.KafkaUtils;
+import org.apache.spark.streaming.kafka010.LocationStrategies;
+import org.apache.spark.streaming.Durations;
+
+/**
+ * Consumes messages from one or more topics in Kafka and does wordcount.
+ * Usage: JavaDirectKerberizedKafkaWordCount   
+ *is a list of one or more Kafka brokers
+ *is a consumer group name to consume from topics
+ *is a list of one or more kafka topics to consume from
+ *
+ * Example:
+ *$ bin/run-example --files ${path}/kafka_jaas.conf \
+ *  --driver-java-options 
"-Djava.security.auth.login.config=${path}/kafka_jaas.conf" \
+ *  --conf \
+ *  
"spark.executor.extraJavaOptions=-Djava.security.auth.login.config=./kafka_jaas.conf"
 \
+ *  streaming.JavaDirectKerberizedKafkaWordCount 
broker1-host:port,broker2-host:port \
+ *  consumer-group topic1,topic2
+ *
+ * kafka_jaas.conf can manually create, template as:
+ *   KafkaClient {
+ * com.sun.security.auth.module.Krb5LoginModule required
+ * keyTab="${path_of_keytab}/kafka.service.keytab"
+ * useKeyTab=true
+ * storeKey=true
+ * useTicketCache=false
+ * serviceName="kafka"
+ * principal="kafka/server@example";
 
 Review comment:
   The principal doesn't contain the `org.domain` parameter which makes this 
example constantly fail in my setup, the rest looks good. I'm using hadoop's 
MiniKDC with default settings.


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] gaborgsomogyi commented on a change in pull request #25412: [SPARK-28691][EXAMPLES] Add Java/Scala DirectKerberizedKafkaWordCount examples

2019-08-14 Thread GitBox
gaborgsomogyi commented on a change in pull request #25412: 
[SPARK-28691][EXAMPLES] Add Java/Scala DirectKerberizedKafkaWordCount examples
URL: https://github.com/apache/spark/pull/25412#discussion_r313789721
 
 

 ##
 File path: 
examples/src/main/java/org/apache/spark/examples/streaming/JavaDirectKerberizedKafkaWordCount.java
 ##
 @@ -0,0 +1,118 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.streaming;
+
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Arrays;
+import java.util.Map;
+import java.util.Set;
+import java.util.regex.Pattern;
+
+import scala.Tuple2;
+
+import org.apache.kafka.clients.CommonClientConfigs;
+import org.apache.kafka.common.security.auth.SecurityProtocol;
+import org.apache.kafka.clients.consumer.ConsumerConfig;
+import org.apache.kafka.clients.consumer.ConsumerRecord;
+import org.apache.kafka.common.serialization.StringDeserializer;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.streaming.api.java.*;
+import org.apache.spark.streaming.kafka010.ConsumerStrategies;
+import org.apache.spark.streaming.kafka010.KafkaUtils;
+import org.apache.spark.streaming.kafka010.LocationStrategies;
+import org.apache.spark.streaming.Durations;
+
+/**
+ * Consumes messages from one or more topics in Kafka and does wordcount.
+ * Usage: JavaDirectKerberizedKafkaWordCount   
+ *is a list of one or more Kafka brokers
+ *is a consumer group name to consume from topics
+ *is a list of one or more kafka topics to consume from
+ *
+ * Example:
+ *$ bin/run-example --files ${path}/kafka_jaas.conf \
+ *  --driver-java-options 
"-Djava.security.auth.login.config=${path}/kafka_jaas.conf" \
+ *  --conf \
+ *  
"spark.executor.extraJavaOptions=-Djava.security.auth.login.config=./kafka_jaas.conf"
 \
+ *  streaming.JavaDirectKerberizedKafkaWordCount 
broker1-host:port,broker2-host:port \
+ *  consumer-group topic1,topic2
+ *
+ * kafka_jaas.conf can manually create, template as:
+ *   KafkaClient {
+ * com.sun.security.auth.module.Krb5LoginModule required
+ * keyTab="${path_of_keytab}/kafka.service.keytab"
+ * useKeyTab=true
+ * storeKey=true
+ * useTicketCache=false
+ * serviceName="kafka"
+ * principal="kafka/server@example";
 
 Review comment:
   Did this work? With what kind of KDC setting have you tested this?
   


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] gaborgsomogyi commented on a change in pull request #25412: [SPARK-28691][EXAMPLES] Add Java/Scala DirectKerberizedKafkaWordCount examples

2019-08-14 Thread GitBox
gaborgsomogyi commented on a change in pull request #25412: 
[SPARK-28691][EXAMPLES] Add Java/Scala DirectKerberizedKafkaWordCount examples
URL: https://github.com/apache/spark/pull/25412#discussion_r313787355
 
 

 ##
 File path: 
examples/src/main/java/org/apache/spark/examples/streaming/JavaDirectKerberizedKafkaWordCount.java
 ##
 @@ -0,0 +1,118 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.streaming;
+
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Arrays;
+import java.util.Map;
+import java.util.Set;
+import java.util.regex.Pattern;
+
+import scala.Tuple2;
+
+import org.apache.kafka.clients.CommonClientConfigs;
+import org.apache.kafka.common.security.auth.SecurityProtocol;
+import org.apache.kafka.clients.consumer.ConsumerConfig;
+import org.apache.kafka.clients.consumer.ConsumerRecord;
+import org.apache.kafka.common.serialization.StringDeserializer;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.streaming.api.java.*;
+import org.apache.spark.streaming.kafka010.ConsumerStrategies;
+import org.apache.spark.streaming.kafka010.KafkaUtils;
+import org.apache.spark.streaming.kafka010.LocationStrategies;
+import org.apache.spark.streaming.Durations;
+
+/**
+ * Consumes messages from one or more topics in Kafka and does wordcount.
+ * Usage: JavaDirectKerberizedKafkaWordCount   
+ *is a list of one or more Kafka brokers
+ *is a consumer group name to consume from topics
+ *is a list of one or more kafka topics to consume from
+ *
+ * Example:
+ *$ bin/run-example --files ${path}/kafka_jaas.conf \
+ *  --driver-java-options 
"-Djava.security.auth.login.config=${path}/kafka_jaas.conf" \
+ *  --conf \
+ *  
"spark.executor.extraJavaOptions=-Djava.security.auth.login.config=./kafka_jaas.conf"
 \
+ *  streaming.JavaDirectKerberizedKafkaWordCount 
broker1-host:port,broker2-host:port \
+ *  consumer-group topic1,topic2
+ *
+ * kafka_jaas.conf can manually create, template as:
+ *   KafkaClient {
+ * com.sun.security.auth.module.Krb5LoginModule required
 
 Review comment:
   This works on sun JVMs, on IBM 
`com.ibm.security.auth.module.Krb5LoginModule` is needed.


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] gaborgsomogyi commented on a change in pull request #25412: [SPARK-28691][EXAMPLES] Add Java/Scala DirectKerberizedKafkaWordCount examples

2019-08-14 Thread GitBox
gaborgsomogyi commented on a change in pull request #25412: 
[SPARK-28691][EXAMPLES] Add Java/Scala DirectKerberizedKafkaWordCount examples
URL: https://github.com/apache/spark/pull/25412#discussion_r313787044
 
 

 ##
 File path: 
examples/src/main/java/org/apache/spark/examples/streaming/JavaDirectKerberizedKafkaWordCount.java
 ##
 @@ -0,0 +1,118 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.streaming;
+
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Arrays;
+import java.util.Map;
+import java.util.Set;
+import java.util.regex.Pattern;
+
+import scala.Tuple2;
+
+import org.apache.kafka.clients.CommonClientConfigs;
+import org.apache.kafka.common.security.auth.SecurityProtocol;
+import org.apache.kafka.clients.consumer.ConsumerConfig;
+import org.apache.kafka.clients.consumer.ConsumerRecord;
+import org.apache.kafka.common.serialization.StringDeserializer;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.streaming.api.java.*;
+import org.apache.spark.streaming.kafka010.ConsumerStrategies;
+import org.apache.spark.streaming.kafka010.KafkaUtils;
+import org.apache.spark.streaming.kafka010.LocationStrategies;
+import org.apache.spark.streaming.Durations;
+
+/**
+ * Consumes messages from one or more topics in Kafka and does wordcount.
+ * Usage: JavaDirectKerberizedKafkaWordCount   
+ *is a list of one or more Kafka brokers
+ *is a consumer group name to consume from topics
+ *is a list of one or more kafka topics to consume from
+ *
+ * Example:
+ *$ bin/run-example --files ${path}/kafka_jaas.conf \
+ *  --driver-java-options 
"-Djava.security.auth.login.config=${path}/kafka_jaas.conf" \
+ *  --conf \
+ *  
"spark.executor.extraJavaOptions=-Djava.security.auth.login.config=./kafka_jaas.conf"
 \
+ *  streaming.JavaDirectKerberizedKafkaWordCount 
broker1-host:port,broker2-host:port \
+ *  consumer-group topic1,topic2
+ *
+ * kafka_jaas.conf can manually create, template as:
+ *   KafkaClient {
+ * com.sun.security.auth.module.Krb5LoginModule required
+ * keyTab="${path_of_keytab}/kafka.service.keytab"
 
 Review comment:
   Since the keytab file is not added to the `--files` section it must exist on 
every server. Worth to mention as prerequisite since it's not obvious for 
everybody.


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] gaborgsomogyi commented on a change in pull request #25412: [SPARK-28691][EXAMPLES] Add Java/Scala DirectKerberizedKafkaWordCount examples

2019-08-14 Thread GitBox
gaborgsomogyi commented on a change in pull request #25412: 
[SPARK-28691][EXAMPLES] Add Java/Scala DirectKerberizedKafkaWordCount examples
URL: https://github.com/apache/spark/pull/25412#discussion_r313785263
 
 

 ##
 File path: 
examples/src/main/scala/org/apache/spark/examples/streaming/DirectKerberizedKafkaWordCount.scala
 ##
 @@ -0,0 +1,90 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.streaming
+
+import org.apache.kafka.clients.CommonClientConfigs
+import org.apache.kafka.clients.consumer.ConsumerConfig
+import org.apache.kafka.common.security.auth.SecurityProtocol
+import org.apache.kafka.common.serialization.StringDeserializer
+
+import org.apache.spark.SparkConf
+import org.apache.spark.streaming._
+import org.apache.spark.streaming.kafka010._
+
+/**
+ * Consumes messages from one or more topics in Kafka and does wordcount.
+ * Usage: DirectKerberizedKafkaWordCount  
+ *is a list of one or more Kafka brokers
+ *is a consumer group name to consume from topics
+ *is a list of one or more kafka topics to consume from
+ *
+ * Example:
+ *$ bin/run-example --files ${path}/kafka_jaas.conf \
 
 Review comment:
   The most used scenario is the keytab one. There are other ways like cache, 
etc... but such things can be found out based on this example + the jaas 
specification.


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] [spark] gaborgsomogyi commented on a change in pull request #25412: [SPARK-28691][EXAMPLES] Add Java/Scala DirectKerberizedKafkaWordCount examples

2019-08-14 Thread GitBox
gaborgsomogyi commented on a change in pull request #25412: 
[SPARK-28691][EXAMPLES] Add Java/Scala DirectKerberizedKafkaWordCount examples
URL: https://github.com/apache/spark/pull/25412#discussion_r313765130
 
 

 ##
 File path: 
examples/src/main/java/org/apache/spark/examples/streaming/JavaDirectKerberizedKafkaWordCount.java
 ##
 @@ -0,0 +1,107 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.streaming;
+
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Arrays;
+import java.util.Map;
+import java.util.Set;
+import java.util.regex.Pattern;
+
+import scala.Tuple2;
+
+import org.apache.kafka.clients.CommonClientConfigs;
+import org.apache.kafka.common.security.auth.SecurityProtocol;
+import org.apache.kafka.clients.consumer.ConsumerConfig;
+import org.apache.kafka.clients.consumer.ConsumerRecord;
+import org.apache.kafka.common.serialization.StringDeserializer;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.streaming.api.java.*;
+import org.apache.spark.streaming.kafka010.ConsumerStrategies;
+import org.apache.spark.streaming.kafka010.KafkaUtils;
+import org.apache.spark.streaming.kafka010.LocationStrategies;
+import org.apache.spark.streaming.Durations;
+
+/**
+ * Consumes messages from one or more topics in Kafka and does wordcount.
+ * Usage: JavaDirectKerberizedKafkaWordCount   
+ *is a list of one or more Kafka brokers
+ *is a consumer group name to consume from topics
+ *is a list of one or more kafka topics to consume from
+ *
+ * Example:
+ *$ bin/run-example --files ${path}/kafka_jaas.conf \
+ *  --driver-java-options 
"-Djava.security.auth.login.config=${path}/kafka_jaas.conf" \
+ *  --conf \
+ *  
"spark.executor.extraJavaOptions=-Djava.security.auth.login.config=./kafka_jaas.conf"
 \
+ *  streaming.JavaDirectKerberizedKafkaWordCount 
broker1-host:port,broker2-host:port \
+ *  consumer-group topic1,topic2
+ */
+
+public final class JavaDirectKerberizedKafkaWordCount {
+  private static final Pattern SPACE = Pattern.compile(" ");
+
+  public static void main(String[] args) throws Exception {
+if (args.length < 3) {
+  System.err.println(
+  "Usage: JavaDirectKerberizedKafkaWordCount   
\n" +
+  "   is a list of one or more Kafka brokers\n" +
+  "   is a consumer group name to consume from 
topics\n" +
+  "   is a list of one or more kafka topics to 
consume from\n\n");
+  System.exit(1);
+}
+
+StreamingExamples.setStreamingLogLevels();
+
+String brokers = args[0];
+String groupId = args[1];
+String topics = args[2];
+
+// Create context with a 2 seconds batch interval
+SparkConf sparkConf = new 
SparkConf().setAppName("JavaDirectKerberizedKafkaWordCount");
+JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, 
Durations.seconds(2));
+
+Set topicsSet = new HashSet<>(Arrays.asList(topics.split(",")));
+Map kafkaParams = new HashMap<>();
+kafkaParams.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, brokers);
+kafkaParams.put(ConsumerConfig.GROUP_ID_CONFIG, groupId);
+kafkaParams.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, 
StringDeserializer.class);
+kafkaParams.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, 
StringDeserializer.class);
+kafkaParams.put(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG,
+  
SecurityProtocol.SASL_PLAINTEXT.name);
 
 Review comment:
   > SSL encryption and kerberos both cause loss of performance.
   
   As any kind of security solution in the world.
   


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: 

[GitHub] [spark] gaborgsomogyi commented on a change in pull request #25412: [SPARK-28691][EXAMPLES] Add Java/Scala DirectKerberizedKafkaWordCount examples

2019-08-13 Thread GitBox
gaborgsomogyi commented on a change in pull request #25412: 
[SPARK-28691][EXAMPLES] Add Java/Scala DirectKerberizedKafkaWordCount examples
URL: https://github.com/apache/spark/pull/25412#discussion_r313331553
 
 

 ##
 File path: 
examples/src/main/java/org/apache/spark/examples/streaming/JavaDirectKerberizedKafkaWordCount.java
 ##
 @@ -0,0 +1,107 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.streaming;
+
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Arrays;
+import java.util.Map;
+import java.util.Set;
+import java.util.regex.Pattern;
+
+import scala.Tuple2;
+
+import org.apache.kafka.clients.CommonClientConfigs;
+import org.apache.kafka.common.security.auth.SecurityProtocol;
+import org.apache.kafka.clients.consumer.ConsumerConfig;
+import org.apache.kafka.clients.consumer.ConsumerRecord;
+import org.apache.kafka.common.serialization.StringDeserializer;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.streaming.api.java.*;
+import org.apache.spark.streaming.kafka010.ConsumerStrategies;
+import org.apache.spark.streaming.kafka010.KafkaUtils;
+import org.apache.spark.streaming.kafka010.LocationStrategies;
+import org.apache.spark.streaming.Durations;
+
+/**
+ * Consumes messages from one or more topics in Kafka and does wordcount.
+ * Usage: JavaDirectKerberizedKafkaWordCount   
+ *is a list of one or more Kafka brokers
+ *is a consumer group name to consume from topics
+ *is a list of one or more kafka topics to consume from
+ *
+ * Example:
+ *$ bin/run-example --files ${path}/kafka_jaas.conf \
+ *  --driver-java-options 
"-Djava.security.auth.login.config=${path}/kafka_jaas.conf" \
+ *  --conf \
+ *  
"spark.executor.extraJavaOptions=-Djava.security.auth.login.config=./kafka_jaas.conf"
 \
+ *  streaming.JavaDirectKerberizedKafkaWordCount 
broker1-host:port,broker2-host:port \
+ *  consumer-group topic1,topic2
+ */
+
+public final class JavaDirectKerberizedKafkaWordCount {
+  private static final Pattern SPACE = Pattern.compile(" ");
+
+  public static void main(String[] args) throws Exception {
+if (args.length < 3) {
+  System.err.println(
+  "Usage: JavaDirectKerberizedKafkaWordCount   
\n" +
+  "   is a list of one or more Kafka brokers\n" +
+  "   is a consumer group name to consume from 
topics\n" +
+  "   is a list of one or more kafka topics to 
consume from\n\n");
+  System.exit(1);
+}
+
+StreamingExamples.setStreamingLogLevels();
+
+String brokers = args[0];
+String groupId = args[1];
+String topics = args[2];
+
+// Create context with a 2 seconds batch interval
+SparkConf sparkConf = new 
SparkConf().setAppName("JavaDirectKerberizedKafkaWordCount");
+JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, 
Durations.seconds(2));
+
+Set topicsSet = new HashSet<>(Arrays.asList(topics.split(",")));
+Map kafkaParams = new HashMap<>();
+kafkaParams.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, brokers);
+kafkaParams.put(ConsumerConfig.GROUP_ID_CONFIG, groupId);
+kafkaParams.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, 
StringDeserializer.class);
+kafkaParams.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, 
StringDeserializer.class);
+kafkaParams.put(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG,
+  
SecurityProtocol.SASL_PLAINTEXT.name);
 
 Review comment:
   I'm not questioning whether `SASL_PLAINTEXT` works or not, it is. I'm 
telling that using kerberos on plain text channel is coming from evil from 
security perspective. Somehow we should tell the users it's not the advised way 
because credentials can be sniffed.


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] [spark] gaborgsomogyi commented on a change in pull request #25412: [SPARK-28691][EXAMPLES] Add Java/Scala DirectKerberizedKafkaWordCount examples

2019-08-13 Thread GitBox
gaborgsomogyi commented on a change in pull request #25412: 
[SPARK-28691][EXAMPLES] Add Java/Scala DirectKerberizedKafkaWordCount examples
URL: https://github.com/apache/spark/pull/25412#discussion_r313272849
 
 

 ##
 File path: 
examples/src/main/java/org/apache/spark/examples/streaming/JavaDirectKerberizedKafkaWordCount.java
 ##
 @@ -0,0 +1,107 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.streaming;
+
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Arrays;
+import java.util.Map;
+import java.util.Set;
+import java.util.regex.Pattern;
+
+import scala.Tuple2;
+
+import org.apache.kafka.clients.CommonClientConfigs;
+import org.apache.kafka.common.security.auth.SecurityProtocol;
+import org.apache.kafka.clients.consumer.ConsumerConfig;
+import org.apache.kafka.clients.consumer.ConsumerRecord;
+import org.apache.kafka.common.serialization.StringDeserializer;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.streaming.api.java.*;
+import org.apache.spark.streaming.kafka010.ConsumerStrategies;
+import org.apache.spark.streaming.kafka010.KafkaUtils;
+import org.apache.spark.streaming.kafka010.LocationStrategies;
+import org.apache.spark.streaming.Durations;
+
+/**
+ * Consumes messages from one or more topics in Kafka and does wordcount.
+ * Usage: JavaDirectKerberizedKafkaWordCount   
+ *is a list of one or more Kafka brokers
+ *is a consumer group name to consume from topics
+ *is a list of one or more kafka topics to consume from
+ *
+ * Example:
+ *$ bin/run-example --files ${path}/kafka_jaas.conf \
+ *  --driver-java-options 
"-Djava.security.auth.login.config=${path}/kafka_jaas.conf" \
+ *  --conf \
+ *  
"spark.executor.extraJavaOptions=-Djava.security.auth.login.config=./kafka_jaas.conf"
 \
+ *  streaming.JavaDirectKerberizedKafkaWordCount 
broker1-host:port,broker2-host:port \
+ *  consumer-group topic1,topic2
+ */
+
+public final class JavaDirectKerberizedKafkaWordCount {
+  private static final Pattern SPACE = Pattern.compile(" ");
+
+  public static void main(String[] args) throws Exception {
+if (args.length < 3) {
+  System.err.println(
+  "Usage: JavaDirectKerberizedKafkaWordCount   
\n" +
+  "   is a list of one or more Kafka brokers\n" +
+  "   is a consumer group name to consume from 
topics\n" +
+  "   is a list of one or more kafka topics to 
consume from\n\n");
+  System.exit(1);
+}
+
+StreamingExamples.setStreamingLogLevels();
+
+String brokers = args[0];
+String groupId = args[1];
+String topics = args[2];
+
+// Create context with a 2 seconds batch interval
+SparkConf sparkConf = new 
SparkConf().setAppName("JavaDirectKerberizedKafkaWordCount");
+JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, 
Durations.seconds(2));
+
+Set topicsSet = new HashSet<>(Arrays.asList(topics.split(",")));
+Map kafkaParams = new HashMap<>();
+kafkaParams.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, brokers);
+kafkaParams.put(ConsumerConfig.GROUP_ID_CONFIG, groupId);
+kafkaParams.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, 
StringDeserializer.class);
+kafkaParams.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, 
StringDeserializer.class);
+kafkaParams.put(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG,
+  
SecurityProtocol.SASL_PLAINTEXT.name);
 
 Review comment:
   `SASL_PLAINTEXT` is only for testing.
   Either I would use `SASL_SSL` or log a warning like `SASL_PLAINTEXT is only 
for testing...`.


This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: 

[GitHub] [spark] gaborgsomogyi commented on a change in pull request #25412: [SPARK-28691][EXAMPLES] Add Java/Scala DirectKerberizedKafkaWordCount examples

2019-08-13 Thread GitBox
gaborgsomogyi commented on a change in pull request #25412: 
[SPARK-28691][EXAMPLES] Add Java/Scala DirectKerberizedKafkaWordCount examples
URL: https://github.com/apache/spark/pull/25412#discussion_r313271843
 
 

 ##
 File path: 
examples/src/main/java/org/apache/spark/examples/streaming/JavaDirectKerberizedKafkaWordCount.java
 ##
 @@ -0,0 +1,107 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.streaming;
+
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Arrays;
+import java.util.Map;
+import java.util.Set;
+import java.util.regex.Pattern;
+
+import scala.Tuple2;
+
+import org.apache.kafka.clients.CommonClientConfigs;
+import org.apache.kafka.common.security.auth.SecurityProtocol;
+import org.apache.kafka.clients.consumer.ConsumerConfig;
+import org.apache.kafka.clients.consumer.ConsumerRecord;
+import org.apache.kafka.common.serialization.StringDeserializer;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.streaming.api.java.*;
+import org.apache.spark.streaming.kafka010.ConsumerStrategies;
+import org.apache.spark.streaming.kafka010.KafkaUtils;
+import org.apache.spark.streaming.kafka010.LocationStrategies;
+import org.apache.spark.streaming.Durations;
+
+/**
+ * Consumes messages from one or more topics in Kafka and does wordcount.
+ * Usage: JavaDirectKerberizedKafkaWordCount   
+ *is a list of one or more Kafka brokers
+ *is a consumer group name to consume from topics
+ *is a list of one or more kafka topics to consume from
+ *
+ * Example:
+ *$ bin/run-example --files ${path}/kafka_jaas.conf \
+ *  --driver-java-options 
"-Djava.security.auth.login.config=${path}/kafka_jaas.conf" \
+ *  --conf \
+ *  
"spark.executor.extraJavaOptions=-Djava.security.auth.login.config=./kafka_jaas.conf"
 \
+ *  streaming.JavaDirectKerberizedKafkaWordCount 
broker1-host:port,broker2-host:port \
+ *  consumer-group topic1,topic2
+ */
+
+public final class JavaDirectKerberizedKafkaWordCount {
+  private static final Pattern SPACE = Pattern.compile(" ");
+
+  public static void main(String[] args) throws Exception {
+if (args.length < 3) {
+  System.err.println(
+  "Usage: JavaDirectKerberizedKafkaWordCount   
\n" +
+  "   is a list of one or more Kafka brokers\n" +
+  "   is a consumer group name to consume from 
topics\n" +
+  "   is a list of one or more kafka topics to 
consume from\n\n");
+  System.exit(1);
+}
+
+StreamingExamples.setStreamingLogLevels();
+
+String brokers = args[0];
+String groupId = args[1];
+String topics = args[2];
+
+// Create context with a 2 seconds batch interval
+SparkConf sparkConf = new 
SparkConf().setAppName("JavaDirectKerberizedKafkaWordCount");
+JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, 
Durations.seconds(2));
+
+Set topicsSet = new HashSet<>(Arrays.asList(topics.split(",")));
+Map kafkaParams = new HashMap<>();
+kafkaParams.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, brokers);
+kafkaParams.put(ConsumerConfig.GROUP_ID_CONFIG, groupId);
+kafkaParams.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, 
StringDeserializer.class);
+kafkaParams.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, 
StringDeserializer.class);
+kafkaParams.put(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG,
+  
SecurityProtocol.SASL_PLAINTEXT.name);
+
+// Create direct kafka stream with brokers and topics
+JavaInputDStream> messages = 
KafkaUtils.createDirectStream(
+jssc,
+LocationStrategies.PreferConsistent(),
+ConsumerStrategies.Subscribe(topicsSet, kafkaParams));
+
+// Get the lines, split them into words, count the words and print
+JavaDStream lines = messages.map(ConsumerRecord::value);
+JavaDStream words = lines.flatMap(x -> 
Arrays.asList(SPACE.split(x)).iterator());
+JavaPairDStream wordCounts = words.mapToPair(s -> new 
Tuple2<>(s, 1))
+.reduceByKey((i1, i2) -> i1 + i2);
 
 Review comment:
   Indentations like this are blown up in the