Github user brkyvz commented on a diff in the pull request:
https://github.com/apache/spark/pull/16744#discussion_r102339856
--- Diff:
external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisUtils.scala
---
@@ -123,9 +123,143 @@ object KinesisUtils {
// scalastyle:on
val cleanedHandler = ssc.sc.clean(messageHandler)
ssc.withNamedScope("kinesis stream") {
+ val kinesisCredsProvider = BasicCredentialsProvider(
+ awsAccessKeyId = awsAccessKeyId,
+ awsSecretKey = awsSecretKey)
new KinesisInputDStream[T](ssc, streamName, endpointUrl,
validateRegion(regionName),
initialPositionInStream, kinesisAppName, checkpointInterval,
storageLevel,
- cleanedHandler, Some(SerializableAWSCredentials(awsAccessKeyId,
awsSecretKey)))
+ cleanedHandler, kinesisCredsProvider)
+ }
+ }
+
+ /**
+ * Create an input stream that pulls messages from a Kinesis stream.
+ * This uses the Kinesis Client Library (KCL) to pull messages from
Kinesis.
+ *
+ * @param ssc StreamingContext object
+ * @param kinesisAppName Kinesis application name used by the Kinesis
Client Library
+ * (KCL) to update DynamoDB
+ * @param streamName Kinesis stream name
+ * @param endpointUrl Url of Kinesis service (e.g.,
https://kinesis.us-east-1.amazonaws.com)
+ * @param regionName Name of region used by the Kinesis Client Library
(KCL) to update
+ * DynamoDB (lease coordination and checkpointing)
and CloudWatch (metrics)
+ * @param initialPositionInStream In the absence of Kinesis checkpoint
info, this is the
+ * worker's initial starting position in
the stream.
+ * The values are either the beginning
of the stream
+ * per Kinesis' limit of 24 hours
+ *
(InitialPositionInStream.TRIM_HORIZON) or
+ * the tip of the stream
(InitialPositionInStream.LATEST).
+ * @param checkpointInterval Checkpoint interval for Kinesis
checkpointing.
+ * See the Kinesis Spark Streaming
documentation for more
+ * details on the different types of
checkpoints.
+ * @param storageLevel Storage level to use for storing the received
objects.
+ * StorageLevel.MEMORY_AND_DISK_2 is recommended.
+ * @param messageHandler A custom message handler that can generate a
generic output from a
+ * Kinesis `Record`, which contains both message
data, and metadata.
+ * @param stsAssumeRoleArn ARN of IAM role to assume when using STS
sessions to read from
+ * Kinesis stream.
+ * @param stsSessionName Name to uniquely identify STS sessions if
multiple princples assume
+ * the same role.
+ * @param stsExternalId External ID that can be used to validate against
the assumed IAM role's
+ * trust policy.
+ *
+ * @note The AWS credentials will be discovered using the
DefaultAWSCredentialsProviderChain
+ * on the workers. See AWS documentation to understand how
DefaultAWSCredentialsProviderChain
+ * gets the AWS credentials.
+ */
+ // scalastyle:off
+ def createStream[T: ClassTag](
+ ssc: StreamingContext,
+ kinesisAppName: String,
+ streamName: String,
+ endpointUrl: String,
+ regionName: String,
+ initialPositionInStream: InitialPositionInStream,
+ checkpointInterval: Duration,
+ storageLevel: StorageLevel,
+ messageHandler: Record => T,
+ stsAssumeRoleArn: String,
+ stsSessionName: String,
+ stsExternalId: String): ReceiverInputDStream[T] = {
+ // scalastyle:on
+ val cleanedHandler = ssc.sc.clean(messageHandler)
+ // Setting scope to override receiver stream's scope of "receiver
stream"
+ ssc.withNamedScope("kinesis stream") {
+ val kinesisCredsProvider = STSCredentialsProvider(
+ stsRoleArn = stsAssumeRoleArn,
+ stsSessionName = stsSessionName,
+ stsExternalId = Option(stsExternalId))
+ new KinesisInputDStream[T](ssc, streamName, endpointUrl,
validateRegion(regionName),
+ initialPositionInStream, kinesisAppName, checkpointInterval,
storageLevel,
+ cleanedHandler, kinesisCredsProvider)
+ }
+ }
+
+ /**
+ * Create an input stream that pulls messages from a Kinesis stream.
+ * This uses the Kinesis Client Library (KCL) to pull messages from
Kinesis.
+ *
+ * @param ssc StreamingContext object
+ * @param kinesisAppName Kinesis application name used by the Kinesis
Client Library
+ * (KCL) to update DynamoDB
+ * @param streamName Kinesis stream name
+ * @param endpointUrl Url of Kinesis service (e.g.,
https://kinesis.us-east-1.amazonaws.com)
+ * @param regionName Name of region used by the Kinesis Client Library
(KCL) to update
+ * DynamoDB (lease coordination and checkpointing)
and CloudWatch (metrics)
+ * @param initialPositionInStream In the absence of Kinesis checkpoint
info, this is the
+ * worker's initial starting position in
the stream.
+ * The values are either the beginning
of the stream
+ * per Kinesis' limit of 24 hours
+ *
(InitialPositionInStream.TRIM_HORIZON) or
+ * the tip of the stream
(InitialPositionInStream.LATEST).
+ * @param checkpointInterval Checkpoint interval for Kinesis
checkpointing.
+ * See the Kinesis Spark Streaming
documentation for more
+ * details on the different types of
checkpoints.
+ * @param storageLevel Storage level to use for storing the received
objects.
+ * StorageLevel.MEMORY_AND_DISK_2 is recommended.
+ * @param messageHandler A custom message handler that can generate a
generic output from a
+ * Kinesis `Record`, which contains both message
data, and metadata.
+ * @param awsAccessKeyId AWS AccessKeyId (if null, will use
DefaultAWSCredentialsProviderChain)
+ * @param awsSecretKey AWS SecretKey (if null, will use
DefaultAWSCredentialsProviderChain)
+ * @param stsAssumeRoleArn ARN of IAM role to assume when using STS
sessions to read from
+ * Kinesis stream.
+ * @param stsSessionName Name to uniquely identify STS sessions if
multiple princples assume
+ * the same role.
+ * @param stsExternalId External ID that can be used to validate against
the assumed IAM role's
+ * trust policy.
+ *
+ * @note The given AWS credentials will get saved in DStream checkpoints
if checkpointing
+ * is enabled. Make sure that your checkpoint directory is secure.
+ */
+ // scalastyle:off
+ def createStream[T: ClassTag](
--- End diff --
let's keep this
---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]