This is an automated email from the ASF dual-hosted git repository.
bobbai00 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/texera.git
The following commit(s) were added to refs/heads/main by this push:
new 5d49015a73 chore(dependencies): drop `twittered` (#4463)
5d49015a73 is described below
commit 5d49015a738f4967cb71e98684c9c4ff7bd7f1cd
Author: Jiadong Bai <[email protected]>
AuthorDate: Wed Apr 22 15:40:16 2026 -0700
chore(dependencies): drop `twittered` (#4463)
### What changes were proposed in this PR?
Drop the `twittered` dependency, which transitively pulls in the
LGPL-3.0 `sonar-packaging-maven-plugin` and `sonar-update-center-common`
(ASF [Category
X](https://www.apache.org/legal/resolved.html#category-x)).
The Twitter `OpDesc` classes are kept as `@deprecated` back-compat stubs
so old workflow JSON still deserializes. The `OpExec` classes are
stubbed (base throws `UnsupportedOperationException` on `open()`;
subclasses return `Iterator.empty`). `TwitterUtils` is removed.
### Any related issues, documentation, discussions?
Closes #4454.
Closes #4455.
Closes #4468
### How was this PR tested?
`sbt WorkflowOperator/compile`, `sbt WorkflowExecutionService/compile`,
and matching `Test/compile` targets pass.
### Was this PR authored or co-authored using generative AI tooling?
Generated-by: Claude Code (Claude Opus 4.7)
Co-authored-by: Claude Opus 4.7 (1M context) <[email protected]>
Co-authored-by: Chen Li <[email protected]>
---
amber/build.sbt | 3 -
common/workflow-operator/build.sbt | 3 +-
.../source/apis/twitter/TwitterSourceOpDesc.scala | 1 +
.../source/apis/twitter/TwitterSourceOpExec.scala | 33 +-----
.../v2/TwitterFullArchiveSearchSourceOpDesc.scala | 1 +
.../v2/TwitterFullArchiveSearchSourceOpExec.scala | 128 +--------------------
.../twitter/v2/TwitterSearchSourceOpDesc.scala | 1 +
.../twitter/v2/TwitterSearchSourceOpExec.scala | 120 +------------------
.../source/apis/twitter/v2/TwitterUtils.scala | 105 -----------------
9 files changed, 14 insertions(+), 381 deletions(-)
diff --git a/amber/build.sbt b/amber/build.sbt
index 31d7ddffae..ff7c722ca9 100644
--- a/amber/build.sbt
+++ b/amber/build.sbt
@@ -210,9 +210,6 @@ libraryDependencies += "com.konghq" % "unirest-java" %
"3.14.2"
// https://mvnrepository.com/artifact/com.github.marianobarrios/lbmq
libraryDependencies += "com.github.marianobarrios" % "lbmq" % "0.6.0"
-// https://mvnrepository.com/artifact/io.github.redouane59.twitter/twittered
-libraryDependencies += "io.github.redouane59.twitter" % "twittered" % "2.21"
-
// https://mvnrepository.com/artifact/org.jooq/jooq
libraryDependencies += "org.jooq" % "jooq" % "3.14.16"
diff --git a/common/workflow-operator/build.sbt
b/common/workflow-operator/build.sbt
index d1383c1f94..1c082cae96 100644
--- a/common/workflow-operator/build.sbt
+++ b/common/workflow-operator/build.sbt
@@ -110,8 +110,7 @@ libraryDependencies ++= Seq(
"org.apache.commons" % "commons-compress" % "1.27.1",
"org.tukaani" % "xz" % "1.9",
"com.univocity" % "univocity-parsers" % "2.9.1",
- "org.apache.lucene" % "lucene-analyzers-common" % "8.11.4",
- "io.github.redouane59.twitter" % "twittered" % "2.21"
+ "org.apache.lucene" % "lucene-analyzers-common" % "8.11.4"
)
libraryDependencies += "io.github.classgraph" % "classgraph" % "4.8.184" % Test
diff --git
a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/apis/twitter/TwitterSourceOpDesc.scala
b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/apis/twitter/TwitterSourceOpDesc.scala
index d65bb12451..223b9bbfff 100644
---
a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/apis/twitter/TwitterSourceOpDesc.scala
+++
b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/apis/twitter/TwitterSourceOpDesc.scala
@@ -25,6 +25,7 @@ import org.apache.texera.amber.core.workflow.OutputPort
import org.apache.texera.amber.operator.metadata.{OperatorGroupConstants,
OperatorInfo}
import org.apache.texera.amber.operator.source.SourceOperatorDescriptor
+@deprecated("Twitter source operator is no longer executable.",
"1.1.0-incubating")
abstract class TwitterSourceOpDesc extends SourceOperatorDescriptor {
@JsonIgnore
diff --git
a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/apis/twitter/TwitterSourceOpExec.scala
b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/apis/twitter/TwitterSourceOpExec.scala
index aff988b235..9577c1f711 100644
---
a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/apis/twitter/TwitterSourceOpExec.scala
+++
b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/apis/twitter/TwitterSourceOpExec.scala
@@ -19,41 +19,16 @@
package org.apache.texera.amber.operator.source.apis.twitter
-import io.github.redouane59.twitter.TwitterClient
-import io.github.redouane59.twitter.signature.TwitterCredentials
import org.apache.texera.amber.core.executor.SourceOperatorExecutor
-import org.apache.texera.amber.util.JSONUtils.objectMapper
+@deprecated("Twitter source operator is no longer executable.",
"1.1.0-incubating")
abstract class TwitterSourceOpExec(
descString: String
) extends SourceOperatorExecutor {
- private val desc: TwitterSourceOpDesc =
- objectMapper.readValue(descString, classOf[TwitterSourceOpDesc])
- // batch size for each API request defined by Twitter
- // 500 is the maximum tweets for each request
- val TWITTER_API_BATCH_SIZE_MAX = 500
-
- // 10 is the minimal tweets for each request
- // val TWITTER_API_BATCH_SIZE_MIN = 10
-
- // however, when using batch size < 100, could cause using different
- // twitter endpoints which has different rate limit.
- // (related to redouane59/twitteredV2.5)
- // thus, in practice, we use 100 as the min batch size.
- val TWITTER_API_BATCH_SIZE_MIN = 100
-
- var twitterClient: TwitterClient = _
-
- override def open(): Unit = {
- twitterClient = new TwitterClient(
- TwitterCredentials
- .builder()
- .apiKey(desc.apiKey)
- .apiSecretKey(desc.apiSecretKey)
- .build()
+ override def open(): Unit =
+ throw new UnsupportedOperationException(
+ "Twitter source operator is no longer executable in Apache Texera."
)
- twitterClient.setAutomaticRetry(!desc.stopWhenRateLimited)
- }
override def close(): Unit = {}
}
diff --git
a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/apis/twitter/v2/TwitterFullArchiveSearchSourceOpDesc.scala
b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/apis/twitter/v2/TwitterFullArchiveSearchSourceOpDesc.scala
index 6c3c74d3cd..2e5722635d 100644
---
a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/apis/twitter/v2/TwitterFullArchiveSearchSourceOpDesc.scala
+++
b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/apis/twitter/v2/TwitterFullArchiveSearchSourceOpDesc.scala
@@ -33,6 +33,7 @@ import
org.apache.texera.amber.operator.metadata.annotations.UIWidget
import org.apache.texera.amber.operator.source.apis.twitter.TwitterSourceOpDesc
import org.apache.texera.amber.util.JSONUtils.objectMapper
+@deprecated("Twitter source operator is no longer executable.",
"1.1.0-incubating")
class TwitterFullArchiveSearchSourceOpDesc extends TwitterSourceOpDesc {
@JsonIgnore
diff --git
a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/apis/twitter/v2/TwitterFullArchiveSearchSourceOpExec.scala
b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/apis/twitter/v2/TwitterFullArchiveSearchSourceOpExec.scala
index 46053543b1..64fa77e6cc 100644
---
a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/apis/twitter/v2/TwitterFullArchiveSearchSourceOpExec.scala
+++
b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/apis/twitter/v2/TwitterFullArchiveSearchSourceOpExec.scala
@@ -19,134 +19,12 @@
package org.apache.texera.amber.operator.source.apis.twitter.v2
-import io.github.redouane59.twitter.dto.endpoints.AdditionalParameters
-import io.github.redouane59.twitter.dto.tweet.TweetList
-import io.github.redouane59.twitter.dto.tweet.TweetV2.TweetData
-import io.github.redouane59.twitter.dto.user.UserV2.UserData
-import org.apache.texera.amber.core.tuple.{Schema, Tuple, TupleLike}
+import org.apache.texera.amber.core.tuple.TupleLike
import org.apache.texera.amber.operator.source.apis.twitter.TwitterSourceOpExec
-import
org.apache.texera.amber.operator.source.apis.twitter.v2.TwitterUtils.tweetDataToTuple
-import org.apache.texera.amber.util.JSONUtils.objectMapper
-
-import java.time.LocalDateTime
-import java.time.format.DateTimeFormatter
-import scala.collection.mutable.ListBuffer
-import scala.collection.{Iterator, mutable}
-import scala.jdk.CollectionConverters.ListHasAsScala
+@deprecated("Twitter source operator is no longer executable.",
"1.1.0-incubating")
class TwitterFullArchiveSearchSourceOpExec(
descString: String
) extends TwitterSourceOpExec(descString) {
- private val desc: TwitterFullArchiveSearchSourceOpDesc =
- objectMapper.readValue(descString,
classOf[TwitterFullArchiveSearchSourceOpDesc])
- var curLimit: Int = desc.limit
- // nextToken is used to retrieve next page of results, if exists.
- var nextToken: String = _
- // contains tweets from the previous request.
- var tweetCache: mutable.Buffer[TweetData] = mutable.Buffer()
- var userCache: Map[String, UserData] = Map()
- var hasNextRequest: Boolean = curLimit > 0
- var lastQueryTime: Long = 0
- val schema: Schema = desc.sourceSchema()
-
- override def produceTuple(): Iterator[TupleLike] =
- new Iterator[TupleLike]() {
- override def hasNext: Boolean = (hasNextRequest || tweetCache.nonEmpty)
&& curLimit > 0
-
- override def next(): Tuple = {
- // if the current cache is exhausted, query for the next response
- if (tweetCache.isEmpty && hasNextRequest) {
- queryForNextBatch(
- desc.searchQuery,
- LocalDateTime.parse(desc.fromDateTime,
DateTimeFormatter.ISO_DATE_TIME),
- LocalDateTime.parse(desc.toDateTime,
DateTimeFormatter.ISO_DATE_TIME),
- curLimit.min(TWITTER_API_BATCH_SIZE_MAX)
- )
- }
-
- // if the request is emtpy, it indicates no more tweets, iterator
should stop
- if (tweetCache.isEmpty) {
- return null
- }
- val tweet: TweetData = tweetCache.remove(0)
-
- curLimit -= 1
-
- // if limit is 0, then no more requests should be sent
- if (curLimit == 0) {
- hasNextRequest = false
- }
-
- val user = userCache.get(tweet.getAuthorId)
-
- tweetDataToTuple(tweet, user, schema)
- }
- }
-
- private def queryForNextBatch(
- query: String,
- startDateTime: LocalDateTime,
- endDateTime: LocalDateTime,
- maxResults: Int
- ): Unit = {
- def enforceRateLimit(): Unit = {
- // Twitter limit 1 request per second and 300 calls in 15 minutes for V2
FullArchiveSearch
- // If request too frequently, twitter will force the client wait for 5
minutes.
- // Here we send at most 1 request per second to avoid hitting rate limit.
- val currentTime = System.currentTimeMillis()
-
- // using 1100 to avoid some edge cases
- if (currentTime - lastQueryTime < 1100) {
- Thread.sleep(currentTime - lastQueryTime)
- }
- lastQueryTime = System.currentTimeMillis()
- }
-
- val params = AdditionalParameters
- .builder()
- .startTime(startDateTime)
- .endTime(endDateTime)
- .maxResults(maxResults.max(TWITTER_API_BATCH_SIZE_MIN))
- .recursiveCall(false)
- .nextToken(nextToken)
- .build()
-
- // There is bug in the library twittered that it returns null although
there exists
- // more pages.
- // Below is a temporary patch to make sure the query stops when there are
actually
- // no more pages. The strategy is to repeat the last request multiple
times to ensure
- // it returns the nextToken as null. The solution is not ideal but should
do job in
- // the most cases.
- // TODO: replace with newer version library twittered when the bug is
fixed.
- var response: TweetList = null
- var retry = 2
- do {
- enforceRateLimit()
- response = twitterClient.searchAllTweets(query, params)
- retry -= 1
-
- if (response == null || response.getMeta == null) {
- // Error in request, result in null responses
- throw new RuntimeException("error in requesting Twitter API, please
check your query.")
-
- }
- } while (response.getMeta.getNextToken == null && retry > 0)
-
- nextToken = response.getMeta.getNextToken
-
- tweetCache =
- if (response != null && response.getData != null)
response.getData.asScala else ListBuffer()
-
- userCache =
- if (response != null && response.getIncludes != null &&
response.getIncludes.getUsers != null)
- response.getIncludes.getUsers.asScala
- .map((userData: UserData) => userData.getId -> userData)
- .toMap
- else Map()
-
- // when there is no more pages left, no need to request any more
- hasNextRequest = nextToken != null
-
- }
-
+ override def produceTuple(): Iterator[TupleLike] = Iterator.empty
}
diff --git
a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/apis/twitter/v2/TwitterSearchSourceOpDesc.scala
b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/apis/twitter/v2/TwitterSearchSourceOpDesc.scala
index 8b6976f4f2..c2ae5170d2 100644
---
a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/apis/twitter/v2/TwitterSearchSourceOpDesc.scala
+++
b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/apis/twitter/v2/TwitterSearchSourceOpDesc.scala
@@ -33,6 +33,7 @@ import
org.apache.texera.amber.operator.metadata.annotations.UIWidget
import org.apache.texera.amber.operator.source.apis.twitter.TwitterSourceOpDesc
import org.apache.texera.amber.util.JSONUtils.objectMapper
+@deprecated("Twitter source operator is no longer executable.",
"1.1.0-incubating")
class TwitterSearchSourceOpDesc extends TwitterSourceOpDesc {
@JsonIgnore
diff --git
a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/apis/twitter/v2/TwitterSearchSourceOpExec.scala
b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/apis/twitter/v2/TwitterSearchSourceOpExec.scala
index 9504664687..e31c110038 100644
---
a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/apis/twitter/v2/TwitterSearchSourceOpExec.scala
+++
b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/apis/twitter/v2/TwitterSearchSourceOpExec.scala
@@ -19,126 +19,12 @@
package org.apache.texera.amber.operator.source.apis.twitter.v2
-import io.github.redouane59.twitter.dto.endpoints.AdditionalParameters
-import io.github.redouane59.twitter.dto.tweet.TweetList
-import io.github.redouane59.twitter.dto.tweet.TweetV2.TweetData
-import io.github.redouane59.twitter.dto.user.UserV2.UserData
-import org.apache.texera.amber.core.tuple.{Schema, Tuple, TupleLike}
+import org.apache.texera.amber.core.tuple.TupleLike
import org.apache.texera.amber.operator.source.apis.twitter.TwitterSourceOpExec
-import
org.apache.texera.amber.operator.source.apis.twitter.v2.TwitterUtils.tweetDataToTuple
-import org.apache.texera.amber.util.JSONUtils.objectMapper
-
-import scala.collection.mutable.ListBuffer
-import scala.collection.{Iterator, mutable}
-import scala.jdk.CollectionConverters.ListHasAsScala
+@deprecated("Twitter source operator is no longer executable.",
"1.1.0-incubating")
class TwitterSearchSourceOpExec(
descString: String
) extends TwitterSourceOpExec(descString) {
- private val desc: TwitterSearchSourceOpDesc =
- objectMapper.readValue(descString, classOf[TwitterSearchSourceOpDesc])
- var curLimit: Int = desc.limit
- // nextToken is used to retrieve next page of results, if exists.
- var nextToken: String = _
- // contains tweets from the previous request.
- var tweetCache: mutable.Buffer[TweetData] = mutable.Buffer()
- var userCache: Map[String, UserData] = Map()
- var hasNextRequest: Boolean = curLimit > 0
- var lastQueryTime: Long = 0
- val schema: Schema = desc.sourceSchema()
-
- override def produceTuple(): Iterator[TupleLike] =
- new Iterator[TupleLike]() {
- override def hasNext: Boolean = (hasNextRequest || tweetCache.nonEmpty)
&& curLimit > 0
-
- override def next(): Tuple = {
- // if the current cache is exhausted, query for the next response
- if (tweetCache.isEmpty && hasNextRequest) {
- queryForNextBatch(
- desc.searchQuery,
- curLimit.min(TWITTER_API_BATCH_SIZE_MAX)
- )
- }
-
- // if the request is emtpy, it indicates no more tweets, iterator
should stop
- if (tweetCache.isEmpty) {
- return null
- }
- val tweet: TweetData = tweetCache.remove(0)
-
- curLimit -= 1
-
- // if limit is 0, then no more requests should be sent
- if (curLimit == 0) {
- hasNextRequest = false
- }
-
- val user = userCache.get(tweet.getAuthorId)
-
- tweetDataToTuple(tweet, user, schema)
- }
- }
-
- private def queryForNextBatch(
- query: String,
- maxResults: Int
- ): Unit = {
- def enforceRateLimit(): Unit = {
- // Twitter limit 1 request per second and 300 calls in 15 minutes for V2
FullArchiveSearch
- // If request too frequently, twitter will force the client wait for 5
minutes.
- // Here we send at most 1 request per second to avoid hitting rate limit.
- val currentTime = System.currentTimeMillis()
-
- // using 1100 to avoid some edge cases
- if (currentTime - lastQueryTime < 1100) {
- Thread.sleep(currentTime - lastQueryTime)
- }
- lastQueryTime = System.currentTimeMillis()
- }
-
- val params = AdditionalParameters
- .builder()
- .maxResults(maxResults.max(TWITTER_API_BATCH_SIZE_MIN))
- .recursiveCall(false)
- .nextToken(nextToken)
- .build()
-
- // There is bug in the library twittered that it returns null although
there exists
- // more pages.
- // Below is a temporary patch to make sure the query stops when there are
actually
- // no more pages. The strategy is to repeat the last request multiple
times to ensure
- // it returns the nextToken as null. The solution is not ideal but should
do job in
- // the most cases.
- // TODO: replace with newer version library twittered when the bug is
fixed.
- var response: TweetList = null
- var retry = 2
- do {
- enforceRateLimit()
- response = twitterClient.searchTweets(query, params)
- retry -= 1
-
- if (response == null || response.getMeta == null) {
- // Error in request, result in null responses
- throw new RuntimeException("error in requesting Twitter API, please
check your query.")
-
- }
- } while (response.getMeta.getNextToken == null && retry > 0)
-
- nextToken = response.getMeta.getNextToken
-
- tweetCache =
- if (response != null && response.getData != null)
response.getData.asScala else ListBuffer()
-
- userCache =
- if (response != null && response.getIncludes != null &&
response.getIncludes.getUsers != null)
- response.getIncludes.getUsers.asScala
- .map((userData: UserData) => userData.getId -> userData)
- .toMap
- else Map()
-
- // when there is no more pages left, no need to request any more
- hasNextRequest = nextToken != null
-
- }
-
+ override def produceTuple(): Iterator[TupleLike] = Iterator.empty
}
diff --git
a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/apis/twitter/v2/TwitterUtils.scala
b/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/apis/twitter/v2/TwitterUtils.scala
deleted file mode 100644
index a1fc6dd7d6..0000000000
---
a/common/workflow-operator/src/main/scala/org/apache/texera/amber/operator/source/apis/twitter/v2/TwitterUtils.scala
+++ /dev/null
@@ -1,105 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.texera.amber.operator.source.apis.twitter.v2
-
-import io.github.redouane59.twitter.dto.tweet.TweetV2.TweetData
-import io.github.redouane59.twitter.dto.user.UserV2.UserData
-import org.apache.texera.amber.core.tuple.{Attribute, AttributeTypeUtils,
Schema, Tuple}
-
-import java.time.format.DateTimeFormatter
-import java.time.{ZoneId, ZoneOffset}
-import scala.jdk.CollectionConverters.IterableHasAsScala
-
-object TwitterUtils {
-
- def tweetDataToTuple(tweetData: TweetData, user: Option[UserData],
tweetSchema: Schema): Tuple = {
- val fields = AttributeTypeUtils.parseFields(
- Array[Any](
- tweetData.getId,
- tweetData.getText,
- // given the fact that the redouane59/twittered library is using
LocalDateTime as the API parameter,
- // we have to fix it to UTC time zone to normalize the time.
- tweetData.getCreatedAt
- .atZone(ZoneId.systemDefault())
- .withZoneSameInstant(ZoneId.of("UTC"))
- .toLocalDateTime
- .atOffset(ZoneOffset.UTC)
- .format(DateTimeFormatter.ISO_DATE_TIME),
- tweetData.getLang,
- tweetData.getTweetType.toString,
- // TODO: add actual geo related information
- Option(tweetData.getGeo).map(_.getPlaceId).orNull,
- Option(tweetData.getGeo).map(_.getCoordinates).orNull,
- tweetData.getInReplyToStatusId,
- tweetData.getInReplyToUserId,
- java.lang.Long.valueOf(tweetData.getLikeCount),
- java.lang.Long.valueOf(tweetData.getQuoteCount),
- java.lang.Long.valueOf(tweetData.getReplyCount),
- java.lang.Long.valueOf(tweetData.getRetweetCount),
- Option(tweetData.getEntities)
- .map(e => Option(e.getHashtags).map(_.asScala.map(x =>
x.getText).mkString(",")).orNull)
- .orNull,
- Option(tweetData.getEntities)
- .map(e => Option(e.getSymbols).map(_.asScala.map(x =>
x.getText).mkString(",")).orNull)
- .orNull,
- Option(tweetData.getEntities)
- .map(e =>
- Option(e.getUrls).map(_.asScala.map(x =>
x.getExpandedUrl).mkString(",")).orNull
- )
- .orNull,
- Option(tweetData.getEntities)
- .map(e =>
- Option(e.getUserMentions).map(_.asScala.map(x =>
x.getText).mkString(",")).orNull
- )
- .orNull,
- user.get.getId,
- user.get.getCreatedAt,
- user.get.getName,
- user.get.getDisplayedName,
- user.get.getLang,
- user.get.getDescription,
- Option(user.get.getPublicMetrics)
- .map(u => java.lang.Long.valueOf(u.getFollowersCount))
- .orNull,
- Option(user.get.getPublicMetrics)
- .map(u => java.lang.Long.valueOf(u.getFollowingCount))
- .orNull,
- Option(user.get.getPublicMetrics)
- .map(u => java.lang.Long.valueOf(u.getTweetCount))
- .orNull,
- Option(user.get.getPublicMetrics)
- .map(u => java.lang.Long.valueOf(u.getListedCount))
- .orNull,
- user.get.getLocation,
- user.get.getUrl,
- user.get.getProfileImageUrl,
- user.get.getPinnedTweetId,
- Boolean.box(user.get.isProtectedAccount),
- Boolean.box(user.get.isVerified)
- ),
- tweetSchema.getAttributes
- .map((attribute: Attribute) => {
- attribute.getType
- })
- .toArray
- )
- Tuple.builder(tweetSchema).addSequentially(fields).build()
- }
-}