Repository: incubator-beam Updated Branches: refs/heads/master 87c5839dc -> 86d222aab
Delete DatastoreWordCount This is the kind of example we do not need to have in Beam. It's just WordCount with a different data source. Also remove a no-longer-needed declared dependency. Project: http://git-wip-us.apache.org/repos/asf/incubator-beam/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-beam/commit/d8ca34cc Tree: http://git-wip-us.apache.org/repos/asf/incubator-beam/tree/d8ca34cc Diff: http://git-wip-us.apache.org/repos/asf/incubator-beam/diff/d8ca34cc Branch: refs/heads/master Commit: d8ca34ccad3ba0717b3a0381936fc9bc11b3bbad Parents: 87c5839 Author: Daniel Halperin <dhalp...@users.noreply.github.com> Authored: Tue Oct 4 19:23:54 2016 -0700 Committer: Dan Halperin <dhalp...@google.com> Committed: Fri Oct 14 10:44:31 2016 -0700 ---------------------------------------------------------------------- examples/java/pom.xml | 5 - .../examples/cookbook/DatastoreWordCount.java | 261 ------------------- 2 files changed, 266 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/d8ca34cc/examples/java/pom.xml ---------------------------------------------------------------------- diff --git a/examples/java/pom.xml b/examples/java/pom.xml index d18f959..37cb15a 100644 --- a/examples/java/pom.xml +++ b/examples/java/pom.xml @@ -466,11 +466,6 @@ </dependency> <dependency> - <groupId>com.google.code.findbugs</groupId> - <artifactId>annotations</artifactId> - </dependency> - - <dependency> <groupId>com.google.cloud.datastore</groupId> <artifactId>datastore-v1-proto-client</artifactId> </dependency> http://git-wip-us.apache.org/repos/asf/incubator-beam/blob/d8ca34cc/examples/java/src/main/java/org/apache/beam/examples/cookbook/DatastoreWordCount.java ---------------------------------------------------------------------- diff --git a/examples/java/src/main/java/org/apache/beam/examples/cookbook/DatastoreWordCount.java b/examples/java/src/main/java/org/apache/beam/examples/cookbook/DatastoreWordCount.java deleted file mode 100644 index c0066e6..0000000 --- a/examples/java/src/main/java/org/apache/beam/examples/cookbook/DatastoreWordCount.java +++ /dev/null @@ -1,261 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.examples.cookbook; - -import static com.google.datastore.v1.client.DatastoreHelper.getString; -import static com.google.datastore.v1.client.DatastoreHelper.makeFilter; -import static com.google.datastore.v1.client.DatastoreHelper.makeKey; -import static com.google.datastore.v1.client.DatastoreHelper.makeValue; - -import com.google.datastore.v1.Entity; -import com.google.datastore.v1.Key; -import com.google.datastore.v1.PropertyFilter; -import com.google.datastore.v1.Query; -import com.google.datastore.v1.Value; -import java.util.Map; -import java.util.UUID; -import javax.annotation.Nullable; -import org.apache.beam.examples.WordCount; -import org.apache.beam.sdk.Pipeline; -import org.apache.beam.sdk.io.TextIO; -import org.apache.beam.sdk.io.gcp.datastore.DatastoreIO; -import org.apache.beam.sdk.io.gcp.datastore.DatastoreV1; -import org.apache.beam.sdk.options.Default; -import org.apache.beam.sdk.options.Description; -import org.apache.beam.sdk.options.PipelineOptions; -import org.apache.beam.sdk.options.PipelineOptionsFactory; -import org.apache.beam.sdk.options.Validation; -import org.apache.beam.sdk.transforms.DoFn; -import org.apache.beam.sdk.transforms.MapElements; -import org.apache.beam.sdk.transforms.ParDo; - -/** - * A WordCount example using DatastoreIO. - * - * <p>This example shows how to use DatastoreIO to read from Cloud Datastore and - * write the results to Cloud Storage. Note that this example will write - * data to Cloud Datastore, which may incur charge for Cloud Datastore operations. - * - * <p>To run this example, users need to use gcloud to get credential for Cloud Datastore: - * <pre>{@code - * $ gcloud auth login - * }</pre> - * - * <p>To run this pipeline locally, the following options must be provided: - * <pre>{@code - * --output=[YOUR_LOCAL_FILE | gs://YOUR_OUTPUT_PATH] - * }</pre> - * - * <p>To change the runner, specify: - * <pre>{@code - * --runner=YOUR_SELECTED_RUNNER - * } - * </pre> - * See examples/java/README.md for instructions about how to configure different runners. - * - * <p><b>Note:</b> this example creates entities with <i>Ancestor keys</i> to ensure that all - * entities created are in the same entity group. Similarly, the query used to read from the Cloud - * Datastore uses an <i>Ancestor filter</i>. Ancestors are used to ensure strongly consistent - * results in Cloud Datastore. For more information, see the Cloud Datastore documentation on - * <a href="https://cloud.google.com/datastore/docs/concepts/structuring_for_strong_consistency"> - * Structing Data for Strong Consistency</a>. - */ -public class DatastoreWordCount { - - /** - * A {@link DoFn} that gets the content of an entity (one line in a - * Shakespeare play) and converts it to a string. - */ - static class GetContentFn extends DoFn<Entity, String> { - @ProcessElement - public void processElement(ProcessContext c) { - Map<String, Value> props = c.element().getProperties(); - Value value = props.get("content"); - if (value != null) { - c.output(getString(value)); - } - } - } - - /** - * A helper function to create the ancestor key for all created and queried entities. - * - * <p>We use ancestor keys and ancestor queries for strong consistency. See - * {@link DatastoreWordCount} javadoc for more information. - */ - static Key makeAncestorKey(@Nullable String namespace, String kind) { - Key.Builder keyBuilder = makeKey(kind, "root"); - if (namespace != null) { - keyBuilder.getPartitionIdBuilder().setNamespaceId(namespace); - } - return keyBuilder.build(); - } - - /** - * A {@link DoFn} that creates an entity for every line in Shakespeare. - */ - static class CreateEntityFn extends DoFn<String, Entity> { - private final String namespace; - private final String kind; - private final Key ancestorKey; - - CreateEntityFn(String namespace, String kind) { - this.namespace = namespace; - this.kind = kind; - - // Build the ancestor key for all created entities once, including the namespace. - ancestorKey = makeAncestorKey(namespace, kind); - } - - public Entity makeEntity(String content) { - Entity.Builder entityBuilder = Entity.newBuilder(); - - // All created entities have the same ancestor Key. - Key.Builder keyBuilder = makeKey(ancestorKey, kind, UUID.randomUUID().toString()); - // NOTE: Namespace is not inherited between keys created with DatastoreHelper.makeKey, so - // we must set the namespace on keyBuilder. TODO: Once partitionId inheritance is added, - // we can simplify this code. - if (namespace != null) { - keyBuilder.getPartitionIdBuilder().setNamespaceId(namespace); - } - - entityBuilder.setKey(keyBuilder.build()); - entityBuilder.getMutableProperties().put("content", makeValue(content).build()); - return entityBuilder.build(); - } - - @ProcessElement - public void processElement(ProcessContext c) { - c.output(makeEntity(c.element())); - } - } - - /** - * Options supported by {@link DatastoreWordCount}. - * - * <p>Inherits standard configuration options. - */ - public static interface Options extends PipelineOptions { - @Description("Path of the file to read from and store to Cloud Datastore") - @Default.String("gs://apache-beam-samples/shakespeare/kinglear.txt") - String getInput(); - void setInput(String value); - - @Description("Path of the file to write to") - @Validation.Required - String getOutput(); - void setOutput(String value); - - @Description("Project ID to read from Cloud Datastore") - @Validation.Required - String getProject(); - void setProject(String value); - - @Description("Cloud Datastore Entity kind") - @Default.String("shakespeare-demo") - String getKind(); - void setKind(String value); - - @Description("Cloud Datastore Namespace") - String getNamespace(); - void setNamespace(@Nullable String value); - - @Description("Read an existing project, do not write first") - boolean isReadOnly(); - void setReadOnly(boolean value); - - @Description("Number of output shards") - @Default.Integer(0) // If the system should choose automatically. - int getNumShards(); - void setNumShards(int value); - } - - /** - * An example that creates a pipeline to populate DatastoreIO from a - * text input. Forces use of DirectRunner for local execution mode. - */ - public static void writeDataToDatastore(Options options) { - Pipeline p = Pipeline.create(options); - p.apply("ReadLines", TextIO.Read.from(options.getInput())) - .apply(ParDo.of(new CreateEntityFn(options.getNamespace(), options.getKind()))) - .apply(DatastoreIO.v1().write().withProjectId(options.getProject())); - - p.run(); - } - - /** - * Build a Cloud Datastore ancestor query for the specified {@link Options#getNamespace} and - * {@link Options#getKind}. - * - * <p>We use ancestor keys and ancestor queries for strong consistency. See - * {@link DatastoreWordCount} javadoc for more information. - * - * @see <a href="https://cloud.google.com/datastore/docs/concepts/queries#Datastore_Ancestor_filters">Ancestor filters</a> - */ - static Query makeAncestorKindQuery(Options options) { - Query.Builder q = Query.newBuilder(); - q.addKindBuilder().setName(options.getKind()); - q.setFilter(makeFilter( - "__key__", - PropertyFilter.Operator.HAS_ANCESTOR, - makeValue(makeAncestorKey(options.getNamespace(), options.getKind())))); - return q.build(); - } - - /** - * An example that creates a pipeline to do DatastoreIO.Read from Cloud Datastore. - */ - public static void readDataFromDatastore(Options options) { - Query query = makeAncestorKindQuery(options); - - // For Datastore sources, the read namespace can be set on the entire query. - DatastoreV1.Read read = DatastoreIO.v1().read() - .withProjectId(options.getProject()) - .withQuery(query) - .withNamespace(options.getNamespace()); - - Pipeline p = Pipeline.create(options); - p.apply("ReadShakespeareFromDatastore", read) - .apply("StringifyEntity", ParDo.of(new GetContentFn())) - .apply("CountWords", new WordCount.CountWords()) - .apply("PrintWordCount", MapElements.via(new WordCount.FormatAsTextFn())) - .apply("WriteLines", TextIO.Write.to(options.getOutput()) - .withNumShards(options.getNumShards())); - p.run(); - } - - /** - * An example to demo how to use {@link DatastoreIO}. - */ - public static void main(String args[]) { - Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class); - - if (!options.isReadOnly()) { - // First example: write data to Datastore for reading later. - // - // NOTE: this write does not delete any existing Entities in the Datastore, so if run - // multiple times with the same output project, there may be duplicate entries. The - // Datastore Query tool in the Google Developers Console can be used to inspect or erase all - // entries with a particular namespace and/or kind. - DatastoreWordCount.writeDataToDatastore(options); - } - - // Second example: do parallel read from Datastore. - DatastoreWordCount.readDataFromDatastore(options); - } -}