[
https://issues.apache.org/jira/browse/BEAM-6079?focusedWorklogId=173681&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-173681
]
ASF GitHub Bot logged work on BEAM-6079:
----------------------------------------
Author: ASF GitHub Bot
Created on: 10/Dec/18 16:31
Start Date: 10/Dec/18 16:31
Worklog Time Spent: 10m
Work Description: iemejia closed pull request #7064: [BEAM-6079] Add
ability for CassandraIO to delete data
URL: https://github.com/apache/beam/pull/7064
This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:
As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):
diff --git
a/sdks/java/io/cassandra/src/main/java/org/apache/beam/sdk/io/cassandra/CassandraIO.java
b/sdks/java/io/cassandra/src/main/java/org/apache/beam/sdk/io/cassandra/CassandraIO.java
index db073d76b912..838dd820e879 100644
---
a/sdks/java/io/cassandra/src/main/java/org/apache/beam/sdk/io/cassandra/CassandraIO.java
+++
b/sdks/java/io/cassandra/src/main/java/org/apache/beam/sdk/io/cassandra/CassandraIO.java
@@ -88,9 +88,16 @@ private CassandraIO() {}
.build();
}
- /** Provide a {@link Write} {@link PTransform} to write data to a Cassandra
database. */
- public static <T> Write<T> write() {
- return new AutoValue_CassandraIO_Write.Builder<T>()
+ /** Provide a {@link Mutate} {@link PTransform} to write data to a Cassandra
database. */
+ public static <T> Mutate<T> write() {
+ return Mutate.<T>builder(MutationType.WRITE)
+ .setCassandraService(new CassandraServiceImpl<>())
+ .build();
+ }
+
+ /** Provide a {@link Mutate} {@link PTransform} to delete data to a
Cassandra database. */
+ public static <T> Mutate<T> delete() {
+ return Mutate.<T>builder(MutationType.DELETE)
.setCassandraService(new CassandraServiceImpl<>())
.build();
}
@@ -313,12 +320,18 @@ public void populateDisplayData(DisplayData.Builder
builder) {
}
}
+ /** Specify the mutation type: either write or delete. */
+ public enum MutationType {
+ WRITE,
+ DELETE
+ }
+
/**
- * A {@link PTransform} to write into Apache Cassandra. See {@link
CassandraIO} for details on
+ * A {@link PTransform} to mutate into Apache Cassandra. See {@link
CassandraIO} for details on
* usage and configuration.
*/
@AutoValue
- public abstract static class Write<T> extends PTransform<PCollection<T>,
PDone> {
+ public abstract static class Mutate<T> extends PTransform<PCollection<T>,
PDone> {
@Nullable
abstract List<String> hosts();
@@ -346,31 +359,48 @@ public void populateDisplayData(DisplayData.Builder
builder) {
@Nullable
abstract CassandraService<T> cassandraService();
+ abstract MutationType mutationType();
+
abstract Builder<T> builder();
+ static <T> Builder<T> builder(MutationType mutationType) {
+ return new
AutoValue_CassandraIO_Mutate.Builder<T>().setMutationType(mutationType);
+ }
+
/** Specify the Cassandra instance hosts where to write data. */
- public Write<T> withHosts(List<String> hosts) {
- checkArgument(hosts != null, "CassandraIO.write().withHosts(hosts)
called with null hosts");
+ public Mutate<T> withHosts(List<String> hosts) {
+ checkArgument(
+ hosts != null,
+ "CassandraIO." + getMutationTypeName() + "().withHosts(hosts) called
with null hosts");
checkArgument(
!hosts.isEmpty(),
- "CassandraIO.write().withHosts(hosts) called with empty " + "hosts
list");
+ "CassandraIO."
+ + getMutationTypeName()
+ + "().withHosts(hosts) called with empty "
+ + "hosts list");
return builder().setHosts(hosts).build();
}
/** Specify the Cassandra instance port number where to write data. */
- public Write<T> withPort(int port) {
+ public Mutate<T> withPort(int port) {
checkArgument(
port > 0,
- "CassandraIO.write().withPort(port) called with invalid port " +
"number (%s)",
+ "CassandraIO."
+ + getMutationTypeName()
+ + "().withPort(port) called with invalid port "
+ + "number (%s)",
port);
return builder().setPort(port).build();
}
/** Specify the Cassandra keyspace where to write data. */
- public Write<T> withKeyspace(String keyspace) {
+ public Mutate<T> withKeyspace(String keyspace) {
checkArgument(
keyspace != null,
- "CassandraIO.write().withKeyspace(keyspace) called with " + "null
keyspace");
+ "CassandraIO."
+ + getMutationTypeName()
+ + "().withKeyspace(keyspace) called with "
+ + "null keyspace");
return builder().setKeyspace(keyspace).build();
}
@@ -378,40 +408,55 @@ public void populateDisplayData(DisplayData.Builder
builder) {
* Specify the entity class in the input {@link PCollection}. The {@link
CassandraIO} will map
* this entity to the Cassandra table thanks to the annotations.
*/
- public Write<T> withEntity(Class<T> entity) {
+ public Mutate<T> withEntity(Class<T> entity) {
checkArgument(
- entity != null, "CassandraIO.write().withEntity(entity) called with
null " + "entity");
+ entity != null,
+ "CassandraIO."
+ + getMutationTypeName()
+ + "().withEntity(entity) called with null "
+ + "entity");
return builder().setEntity(entity).build();
}
/** Specify the username used for authentication. */
- public Write<T> withUsername(String username) {
+ public Mutate<T> withUsername(String username) {
checkArgument(
username != null,
- "CassandraIO.write().withUsername(username) called with " + "null
username");
+ "CassandraIO."
+ + getMutationTypeName()
+ + "().withUsername(username) called with "
+ + "null username");
return builder().setUsername(username).build();
}
/** Specify the password used for authentication. */
- public Write<T> withPassword(String password) {
+ public Mutate<T> withPassword(String password) {
checkArgument(
password != null,
- "CassandraIO.write().withPassword(password) called with " + "null
password");
+ "CassandraIO."
+ + getMutationTypeName()
+ + "().withPassword(password) called with "
+ + "null password");
return builder().setPassword(password).build();
}
/** Specify the local DC used by the load balancing policy. */
- public Write<T> withLocalDc(String localDc) {
+ public Mutate<T> withLocalDc(String localDc) {
checkArgument(
localDc != null,
- "CassandraIO.write().withLocalDc(localDc) called with null" + "
localDc");
+ "CassandraIO."
+ + getMutationTypeName()
+ + "().withLocalDc(localDc) called with null"
+ + " localDc");
return builder().setLocalDc(localDc).build();
}
- public Write<T> withConsistencyLevel(String consistencyLevel) {
+ public Mutate<T> withConsistencyLevel(String consistencyLevel) {
checkArgument(
consistencyLevel != null,
- "CassandraIO.write().withConsistencyLevel"
+ "CassandraIO."
+ + getMutationTypeName()
+ + "().withConsistencyLevel"
+ "(consistencyLevel) called with null consistencyLevel");
return builder().setConsistencyLevel(consistencyLevel).build();
}
@@ -419,10 +464,13 @@ public void populateDisplayData(DisplayData.Builder
builder) {
/**
* Specify the {@link CassandraService} used to connect and write into the
Cassandra database.
*/
- public Write<T> withCassandraService(CassandraService<T> cassandraService)
{
+ public Mutate<T> withCassandraService(CassandraService<T>
cassandraService) {
checkArgument(
cassandraService != null,
- "CassandraIO.write().withCassandraService" + "(service) called with
null service");
+ "CassandraIO."
+ + getMutationTypeName()
+ + "().withCassandraService"
+ + "(service) called with null service");
return builder().setCassandraService(cassandraService).build();
}
@@ -430,27 +478,47 @@ public void populateDisplayData(DisplayData.Builder
builder) {
public void validate(PipelineOptions pipelineOptions) {
checkState(
hosts() != null || cassandraService() != null,
- "CassandraIO.write() requires a list of hosts to be set via
withHosts(hosts) or a "
+ "CassandraIO."
+ + getMutationTypeName()
+ + "() requires a list of hosts to be set via withHosts(hosts) or
a "
+ "Cassandra service to be set via
withCassandraService(service)");
checkState(
port() != null || cassandraService() != null,
- "CassandraIO.write() requires a "
+ "CassandraIO."
+ + getMutationTypeName()
+ + "() requires a "
+ "valid port number to be set via withPort(port) or a Cassandra
service to be set via "
+ "withCassandraService(service)");
checkState(
keyspace() != null,
- "CassandraIO.write() requires a keyspace to be set via " +
"withKeyspace(keyspace)");
+ "CassandraIO."
+ + getMutationTypeName()
+ + "() requires a keyspace to be set via "
+ + "withKeyspace(keyspace)");
checkState(
entity() != null,
- "CassandraIO.write() requires an entity to be set via " +
"withEntity(entity)");
+ "CassandraIO."
+ + getMutationTypeName()
+ + "() requires an entity to be set via "
+ + "withEntity(entity)");
}
@Override
public PDone expand(PCollection<T> input) {
- input.apply(ParDo.of(new WriteFn<>(this)));
+ if (mutationType() == MutationType.DELETE) {
+ input.apply(ParDo.of(new DeleteFn<T>(this)));
+ } else {
+ input.apply(ParDo.of(new WriteFn<T>(this)));
+ }
return PDone.in(input.getPipeline());
}
+ private String getMutationTypeName() {
+ return mutationType() == null
+ ? MutationType.WRITE.name().toLowerCase()
+ : mutationType().name().toLowerCase();
+ }
+
@AutoValue.Builder
abstract static class Builder<T> {
abstract Builder<T> setHosts(List<String> hosts);
@@ -471,15 +539,17 @@ public PDone expand(PCollection<T> input) {
abstract Builder<T> setCassandraService(CassandraService<T>
cassandraService);
- abstract Write<T> build();
+ abstract Builder<T> setMutationType(MutationType mutationType);
+
+ abstract Mutate<T> build();
}
}
private static class WriteFn<T> extends DoFn<T, Void> {
- private final Write<T> spec;
- private CassandraService.Writer writer;
+ private final Mutate<T> spec;
+ private CassandraService.Writer<T> writer;
- WriteFn(Write<T> spec) {
+ WriteFn(Mutate<T> spec) {
this.spec = spec;
}
@@ -499,4 +569,29 @@ public void teardown() throws Exception {
writer = null;
}
}
+
+ private static class DeleteFn<T> extends DoFn<T, Void> {
+ private final Mutate<T> spec;
+ private CassandraService.Deleter<T> deleter;
+
+ DeleteFn(Mutate<T> spec) {
+ this.spec = spec;
+ }
+
+ @Setup
+ public void setup() {
+ deleter = spec.cassandraService().createDeleter(spec);
+ }
+
+ @ProcessElement
+ public void processElement(ProcessContext c) throws ExecutionException,
InterruptedException {
+ deleter.delete(c.element());
+ }
+
+ @Teardown
+ public void teardown() throws Exception {
+ deleter.close();
+ deleter = null;
+ }
+ }
}
diff --git
a/sdks/java/io/cassandra/src/main/java/org/apache/beam/sdk/io/cassandra/CassandraService.java
b/sdks/java/io/cassandra/src/main/java/org/apache/beam/sdk/io/cassandra/CassandraService.java
index be4a25705150..92bd261dba97 100644
---
a/sdks/java/io/cassandra/src/main/java/org/apache/beam/sdk/io/cassandra/CassandraService.java
+++
b/sdks/java/io/cassandra/src/main/java/org/apache/beam/sdk/io/cassandra/CassandraService.java
@@ -38,7 +38,7 @@
List<BoundedSource<T>> split(CassandraIO.Read<T> spec, long
desiredBundleSizeBytes);
/** Create a {@link Writer} that writes entities into the Cassandra
instance. */
- Writer createWriter(CassandraIO.Write<T> spec);
+ Writer createWriter(CassandraIO.Mutate<T> spec);
/** Writer for an entity. */
interface Writer<T> extends AutoCloseable {
@@ -48,4 +48,16 @@
*/
void write(T entity) throws ExecutionException, InterruptedException;
}
+
+ /** Create a {@link Writer} that writes entities into the Cassandra
instance. */
+ Deleter createDeleter(CassandraIO.Mutate<T> spec);
+
+ /** Deleter for an entity. */
+ interface Deleter<T> extends AutoCloseable {
+ /**
+ * This method should be synchronous. It means you have to be sure that
the entity is fully
+ * stored (and committed) into the Cassandra instance when you exit from
this method.
+ */
+ void delete(T entity) throws ExecutionException, InterruptedException;
+ }
}
diff --git
a/sdks/java/io/cassandra/src/main/java/org/apache/beam/sdk/io/cassandra/CassandraServiceImpl.java
b/sdks/java/io/cassandra/src/main/java/org/apache/beam/sdk/io/cassandra/CassandraServiceImpl.java
index 7a577e93ae02..9414b0e05438 100644
---
a/sdks/java/io/cassandra/src/main/java/org/apache/beam/sdk/io/cassandra/CassandraServiceImpl.java
+++
b/sdks/java/io/cassandra/src/main/java/org/apache/beam/sdk/io/cassandra/CassandraServiceImpl.java
@@ -42,6 +42,7 @@
import java.util.List;
import java.util.NoSuchElementException;
import java.util.concurrent.ExecutionException;
+import java.util.function.BiFunction;
import java.util.stream.Collectors;
import javax.annotation.Nullable;
import org.apache.beam.sdk.io.BoundedSource;
@@ -394,21 +395,39 @@ public TokenRange(
}
/** Writer storing an entity into Apache Cassandra database. */
- protected class WriterImpl implements Writer<T> {
+ protected class WriterImpl extends MutatorImpl implements Writer<T> {
+
+ WriterImpl(CassandraIO.Mutate<T> spec) {
+ super(spec, Mapper::saveAsync, "writes");
+ }
+
+ @Override
+ public void write(T entity) throws ExecutionException,
InterruptedException {
+ mutate(entity);
+ }
+ }
+
+ /** Mutator allowing to do side effects into Apache Cassandra database. */
+ protected abstract class MutatorImpl {
/**
* The threshold of 100 concurrent async queries is a heuristic commonly
used by the Apache
* Cassandra community. There is no real gain to expect in tuning this
value.
*/
private static final int CONCURRENT_ASYNC_QUERIES = 100;
- private final CassandraIO.Write<T> spec;
+ private final CassandraIO.Mutate<T> spec;
private final Cluster cluster;
private final Session session;
private final MappingManager mappingManager;
- private List<ListenableFuture<Void>> writeFutures;
-
- WriterImpl(CassandraIO.Write<T> spec) {
+ private List<ListenableFuture<Void>> mutateFutures;
+ private BiFunction<Mapper<T>, T, ListenableFuture<Void>> mutator;
+ private String operationName;
+
+ MutatorImpl(
+ CassandraIO.Mutate<T> spec,
+ BiFunction<Mapper<T>, T, ListenableFuture<Void>> mutator,
+ String operationName) {
this.spec = spec;
this.cluster =
getCluster(
@@ -420,34 +439,35 @@ public TokenRange(
spec.consistencyLevel());
this.session = cluster.connect(spec.keyspace());
this.mappingManager = new MappingManager(session);
- this.writeFutures = new ArrayList<>();
+ this.mutateFutures = new ArrayList<>();
+ this.mutator = mutator;
+ this.operationName = operationName;
}
/**
- * Write the entity to the Cassandra instance, using {@link Mapper}
obtained with the {@link
+ * Mutate the entity to the Cassandra instance, using {@link Mapper}
obtained with the {@link
* MappingManager}. This method uses {@link Mapper#saveAsync(Object)}
method, which is
* asynchronous. Beam will wait for all futures to complete, to guarantee
all writes have
* succeeded.
*/
- @Override
- public void write(T entity) throws ExecutionException,
InterruptedException {
+ public void mutate(T entity) throws ExecutionException,
InterruptedException {
Mapper<T> mapper = (Mapper<T>) mappingManager.mapper(entity.getClass());
- this.writeFutures.add(mapper.saveAsync(entity));
- if (this.writeFutures.size() == CONCURRENT_ASYNC_QUERIES) {
+ this.mutateFutures.add(mutator.apply(mapper, entity));
+ if (this.mutateFutures.size() == CONCURRENT_ASYNC_QUERIES) {
// We reached the max number of allowed in flight queries.
// Write methods are synchronous in Beam as stated by the
CassandraService interface,
// so we wait for each async query to return before exiting.
LOG.debug(
- "Waiting for a batch of {} Cassandra writes to be executed...",
- CONCURRENT_ASYNC_QUERIES);
+ "Waiting for a batch of {} Cassandra {} to be executed...",
+ CONCURRENT_ASYNC_QUERIES,
+ operationName);
waitForFuturesToFinish();
- this.writeFutures = new ArrayList<>();
+ this.mutateFutures = new ArrayList<>();
}
}
- @Override
public void close() throws ExecutionException, InterruptedException {
- if (this.writeFutures.size() > 0) {
+ if (this.mutateFutures.size() > 0) {
// Waiting for the last in flight async queries to return before
finishing the bundle.
waitForFuturesToFinish();
}
@@ -461,14 +481,32 @@ public void close() throws ExecutionException,
InterruptedException {
}
private void waitForFuturesToFinish() throws ExecutionException,
InterruptedException {
- for (ListenableFuture<Void> future : writeFutures) {
+ for (ListenableFuture<Void> future : mutateFutures) {
future.get();
}
}
}
@Override
- public Writer createWriter(CassandraIO.Write<T> spec) {
+ public Writer createWriter(CassandraIO.Mutate<T> spec) {
return new WriterImpl(spec);
}
+
+ /** Deleter storing an entity into Apache Cassandra database. */
+ protected class DeleterImpl extends MutatorImpl implements Deleter<T> {
+
+ DeleterImpl(CassandraIO.Mutate<T> spec) {
+ super(spec, (tMapper, t) -> tMapper.deleteAsync(t), "deletes");
+ }
+
+ @Override
+ public void delete(T entity) throws ExecutionException,
InterruptedException {
+ mutate(entity);
+ }
+ }
+
+ @Override
+ public Deleter createDeleter(CassandraIO.Mutate<T> spec) {
+ return new DeleterImpl(spec);
+ }
}
diff --git
a/sdks/java/io/cassandra/src/test/java/org/apache/beam/sdk/io/cassandra/CassandraIOTest.java
b/sdks/java/io/cassandra/src/test/java/org/apache/beam/sdk/io/cassandra/CassandraIOTest.java
index 5e98cef227ab..4a048c07f4e2 100644
---
a/sdks/java/io/cassandra/src/test/java/org/apache/beam/sdk/io/cassandra/CassandraIOTest.java
+++
b/sdks/java/io/cassandra/src/test/java/org/apache/beam/sdk/io/cassandra/CassandraIOTest.java
@@ -129,6 +129,32 @@ public void testWrite() {
}
}
+ @Test
+ public void testDelete() {
+ FakeCassandraService service = new FakeCassandraService();
+ service.load();
+
+ assertEquals(10000, service.getTable().size());
+
+ pipeline
+ .apply(
+ CassandraIO.<Scientist>read()
+ .withCassandraService(service)
+ .withKeyspace("beam")
+ .withTable("scientist")
+ .withCoder(SerializableCoder.of(Scientist.class))
+ .withEntity(Scientist.class))
+ .apply(
+ CassandraIO.<Scientist>delete()
+ .withCassandraService(service)
+ .withKeyspace("beam")
+ .withEntity(Scientist.class));
+
+ pipeline.run();
+
+ assertEquals(0, service.getTable().size());
+ }
+
/** A {@link CassandraService} implementation that stores the entity in
memory. */
private static class FakeCassandraService implements
CassandraService<Scientist> {
private static final Map<Integer, Scientist> table = new
ConcurrentHashMap<>();
@@ -241,9 +267,26 @@ public void close() {
}
@Override
- public FakeCassandraWriter createWriter(CassandraIO.Write<Scientist> spec)
{
+ public FakeCassandraWriter createWriter(CassandraIO.Mutate<Scientist>
spec) {
return new FakeCassandraWriter();
}
+
+ private static class FakeCassandraDeleter implements Deleter<Scientist> {
+ @Override
+ public void delete(Scientist scientist) {
+ table.remove(scientist.id);
+ }
+
+ @Override
+ public void close() {
+ // nothing to do
+ }
+ }
+
+ @Override
+ public FakeCassandraDeleter createDeleter(CassandraIO.Mutate<Scientist>
spec) {
+ return new FakeCassandraDeleter();
+ }
}
/** Simple Cassandra entity used in test. */
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
Issue Time Tracking
-------------------
Worklog Id: (was: 173681)
Time Spent: 1h (was: 50m)
> Add ability for CassandraIO to delete data
> ------------------------------------------
>
> Key: BEAM-6079
> URL: https://issues.apache.org/jira/browse/BEAM-6079
> Project: Beam
> Issue Type: Improvement
> Components: io-java-cassandra
> Reporter: Fabien Rousseau
> Assignee: Fabien Rousseau
> Priority: Minor
> Fix For: 2.10.0
>
> Time Spent: 1h
> Remaining Estimate: 0h
>
> Currently, it's possible to read & write data using CassandraIO.
> It would be nice to be able to delete using a CassandraIO.delete().
>
> I can provide a PR
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)