This is an automated email from the ASF dual-hosted git repository.
bertty pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-wayang-website.git
The following commit(s) were added to refs/heads/main by this push:
new 44bc7ae Fix wrong encoding of Java Wordcount sample
new 447a1b2 Merge pull request #5 from dominikriemer/main
44bc7ae is described below
commit 44bc7ae9bfe12008014fcad01e6a430b07145c42
Author: Dominik Riemer <[email protected]>
AuthorDate: Sat Oct 23 15:22:17 2021 +0200
Fix wrong encoding of Java Wordcount sample
---
documentation.md | 26 ++++++++++++--------------
1 file changed, 12 insertions(+), 14 deletions(-)
diff --git a/documentation.md b/documentation.md
index 4837446..8bac9d2 100644
--- a/documentation.md
+++ b/documentation.md
@@ -188,43 +188,41 @@ public class WordcountJava {
public static void main(String[] args){
// Settings
- String inputUrl = "file:/tmp.txt";
+ String inputUrl = "file:/tmp.txt";
// Get a plan builder.
WayangContext wayangContext = new WayangContext(new Configuration())
.withPlugin(Java.basicPlugin())
.withPlugin(Spark.basicPlugin());
JavaPlanBuilder planBuilder = new JavaPlanBuilder(wayangContext)
- .withJobName(String.format("WordCount (%s)",
inputUrl))
+ .withJobName(String.format("WordCount (%s)", inputUrl))
.withUdfJarOf(WordcountJava.class);
// Start building the Apache WayangPlan.
- Collection<Tuple2<String, Integer>> wordcounts =
planBuilder
+ Collection<Tuple2<String, Integer>> wordcounts = planBuilder
// Read the text file.
- .readTextFile(inputUrl).withName("Load file")
+ .readTextFile(inputUrl).withName("Load file")
// Split each line by non-word characters.
- .flatMap(line ->
Arrays.asList(line.split("\\W+")))
+ .flatMap(line -> Arrays.asList(line.split("\\W+")))
.withSelectivity(10, 100, 0.9)
- .withName("Split words")
+ .withName("Split words")
// Filter empty tokens.
- .filter(token -> !token.isEmpty())
+ .filter(token -> !token.isEmpty())
.withSelectivity(0.99, 0.99, 0.99)
- .withName("Filter empty words")
+ .withName("Filter empty words")
// Attach counter to each word.
- .map(word -> new Tuple2<>(word.toLowerCase(),
1)).withName("To lower case, add counter")
+ .map(word -> new Tuple2<>(word.toLowerCase(), 1)).withName("To
lower case, add counter")
// Sum up counters for every word.
.reduceByKey(
Tuple2::getField0,
- (t1, t2) -> new Tuple2<>(t1.getField0(),
t1.getField1() + t2.getField1())
+ (t1, t2) -> new Tuple2<>(t1.getField0(),
t1.getField1() + t2.getField1())
)
- .withCardinalityEstimator(new DefaultCardinalityEstimator(0.9,
1, false, in -> Math.round(0.01 </li>
-<li>
-in[0])))
- .withName("Add counters")
+ .withCardinalityEstimator(new DefaultCardinalityEstimator(0.9,
1, false, in -> Math.round(0.01 * in[0])))
+ .withName("Add counters")
// Execute the plan and collect the results.
.collect();