iemejia commented on a change in pull request #10546: [BEAM-9008] Add
CassandraIO readAll method
URL: https://github.com/apache/beam/pull/10546#discussion_r405869911
##########
File path:
sdks/java/io/cassandra/src/main/java/org/apache/beam/sdk/io/cassandra/CassandraIO.java
##########
@@ -1170,4 +887,344 @@ private void waitForFuturesToFinish() throws
ExecutionException, InterruptedExce
}
}
}
+
+ /**
+ * A {@link PTransform} to read data from Apache Cassandra. See {@link
CassandraIO} for more
+ * information on usage and configuration.
+ */
+ @AutoValue
+ public abstract static class ReadAll<T> extends
PTransform<PCollection<Read<T>>, PCollection<T>> {
+
+ @Nullable
+ abstract ValueProvider<List<String>> hosts();
+
+ @Nullable
+ abstract ValueProvider<String> query();
+
+ @Nullable
+ abstract ValueProvider<Integer> port();
+
+ @Nullable
+ abstract ValueProvider<String> keyspace();
+
+ @Nullable
+ abstract ValueProvider<String> table();
+
+ @Nullable
+ abstract Class<T> entity();
+
+ @Nullable
+ abstract Coder<T> coder();
+
+ @Nullable
+ abstract ValueProvider<String> username();
+
+ @Nullable
+ abstract ValueProvider<String> password();
+
+ @Nullable
+ abstract ValueProvider<String> localDc();
+
+ @Nullable
+ abstract ValueProvider<String> consistencyLevel();
+
+ @Nullable
+ abstract ValueProvider<Integer> splitCount();
+
+ @Nullable
+ abstract SerializableFunction<Session, Mapper> mapperFactoryFn();
+
+ @Nullable
+ abstract SerializableFunction<RingRange, Integer> groupingFn();
+
+ abstract Builder<T> builder();
+
+ /** Specify the hosts of the Apache Cassandra instances. */
+ public ReadAll<T> withHosts(List<String> hosts) {
+ checkArgument(hosts != null, "hosts can not be null");
+ checkArgument(!hosts.isEmpty(), "hosts can not be empty");
+ return withHosts(ValueProvider.StaticValueProvider.of(hosts));
+ }
+
+ /** Specify the hosts of the Apache Cassandra instances. */
+ public ReadAll<T> withHosts(ValueProvider<List<String>> hosts) {
+ return builder().setHosts(hosts).build();
+ }
+
+ /** Specify the port number of the Apache Cassandra instances. */
+ public ReadAll<T> withPort(int port) {
+ checkArgument(port > 0, "port must be > 0, but was: %s", port);
+ return withPort(ValueProvider.StaticValueProvider.of(port));
+ }
+
+ /** Specify the port number of the Apache Cassandra instances. */
+ public ReadAll<T> withPort(ValueProvider<Integer> port) {
+ return builder().setPort(port).build();
+ }
+
+ /** Specify the Cassandra keyspace where to read data. */
+ public ReadAll<T> withKeyspace(String keyspace) {
+ checkArgument(keyspace != null, "keyspace can not be null");
+ return withKeyspace(ValueProvider.StaticValueProvider.of(keyspace));
+ }
+
+ /** Specify the Cassandra keyspace where to read data. */
+ public ReadAll<T> withKeyspace(ValueProvider<String> keyspace) {
+ return builder().setKeyspace(keyspace).build();
+ }
+
+ /** Specify the Cassandra table where to read data. */
+ public ReadAll<T> withTable(String table) {
+ checkArgument(table != null, "table can not be null");
+ return withTable(ValueProvider.StaticValueProvider.of(table));
+ }
+
+ /** Specify the Cassandra table where to read data. */
+ public ReadAll<T> withTable(ValueProvider<String> table) {
+ return builder().setTable(table).build();
+ }
+
+ /** Specify the query to read data. */
+ public ReadAll<T> withQuery(String query) {
+ checkArgument(query != null && query.length() > 0, "query cannot be
null");
+ return withQuery(ValueProvider.StaticValueProvider.of(query));
+ }
+
+ /** Specify the query to read data. */
+ public ReadAll<T> withQuery(ValueProvider<String> query) {
+ return builder().setQuery(query).build();
+ }
+
+ /**
+ * Specify the entity class (annotated POJO). The {@link CassandraIO} will
read the data and
+ * convert the data as entity instances. The {@link PCollection} resulting
from the read will
+ * contains entity elements.
+ */
+ public ReadAll<T> withEntity(Class<T> entity) {
+ checkArgument(entity != null, "entity can not be null");
+ return builder().setEntity(entity).build();
+ }
+
+ /** Specify the {@link Coder} used to serialize the entity in the {@link
PCollection}. */
+ public ReadAll<T> withCoder(Coder<T> coder) {
+ checkArgument(coder != null, "coder can not be null");
+ return builder().setCoder(coder).build();
+ }
+
+ /** Specify the username for authentication. */
+ public ReadAll<T> withUsername(String username) {
+ checkArgument(username != null, "username can not be null");
+ return withUsername(ValueProvider.StaticValueProvider.of(username));
+ }
+
+ /** Specify the username for authentication. */
+ public ReadAll<T> withUsername(ValueProvider<String> username) {
+ return builder().setUsername(username).build();
+ }
+
+ /** Specify the password used for authentication. */
+ public ReadAll<T> withPassword(String password) {
+ checkArgument(password != null, "password can not be null");
+ return withPassword(ValueProvider.StaticValueProvider.of(password));
+ }
+
+ /** Specify the password used for authentication. */
+ public ReadAll<T> withPassword(ValueProvider<String> password) {
+ return builder().setPassword(password).build();
+ }
+
+ /** Specify the local DC used for the load balancing. */
+ public ReadAll<T> withLocalDc(String localDc) {
+ checkArgument(localDc != null, "localDc can not be null");
+ return withLocalDc(ValueProvider.StaticValueProvider.of(localDc));
+ }
+
+ /** Specify the local DC used for the load balancing. */
+ public ReadAll<T> withLocalDc(ValueProvider<String> localDc) {
+ return builder().setLocalDc(localDc).build();
+ }
+
+ public ReadAll<T> withConsistencyLevel(String consistencyLevel) {
+ checkArgument(consistencyLevel != null, "consistencyLevel can not be
null");
+ return
withConsistencyLevel(ValueProvider.StaticValueProvider.of(consistencyLevel));
+ }
+
+ public ReadAll<T> withConsistencyLevel(ValueProvider<String>
consistencyLevel) {
+ return builder().setConsistencyLevel(consistencyLevel).build();
+ }
+
+ public ReadAll<T> withGroupingFn(SerializableFunction<RingRange, Integer>
groupingFunction) {
+ return builder().setGroupingFn(groupingFunction).build();
+ }
+
+ public ReadAll<T> withSplitCount(ValueProvider<Integer> splitCount) {
+ return builder().setSplitCount(splitCount).build();
+ }
+
+ public ReadAll<T> withSplitCount(Integer splitCount) {
+ checkArgument(splitCount != null, "splitCount can not be null");
+ return
withSplitCount(ValueProvider.StaticValueProvider.<Integer>of(splitCount));
+ }
+
+ /**
+ * A factory to create a specific {@link Mapper} for a given Cassandra
Session. This is useful
+ * to provide mappers that don't rely in Cassandra annotated objects.
+ */
+ public ReadAll<T> withMapperFactoryFn(SerializableFunction<Session,
Mapper> mapperFactory) {
+ checkArgument(
+ mapperFactory != null,
+ "CassandraIO.withMapperFactory" + "(withMapperFactory) called with
null value");
+ return builder().setMapperFactoryFn(mapperFactory).build();
+ }
+
+ @Override
+ public PCollection<T> expand(PCollection<Read<T>> input) {
+ checkArgument((hosts() != null && port() != null), "WithHosts() and
withPort() are required");
+ checkArgument(keyspace() != null, "withKeyspace() is required");
+ checkArgument(table() != null, "withTable() is required");
+ checkArgument(entity() != null, "withEntity() is required");
+ checkArgument(coder() != null, "withCoder() is required");
+ checkArgument(groupingFn() != null, "GroupingFn OR splitCount must be
set");
+ try (Cluster cluster =
Review comment:
The key point here is to apply for every read the split function and produce
subsequent reads with RingRanges that then are Reshuffled and passed to a ParDo
with the ReadFn function.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services