This is an automated email from the ASF dual-hosted git repository.

mergebot-role pushed a commit to branch asf-site
in repository https://gitbox.apache.org/repos/asf/beam-site.git

commit c77047075a163d2c19fc999cf717dd659eae315c
Author: Mergebot <[email protected]>
AuthorDate: Tue May 1 10:45:03 2018 -0700

    Prepare repository for deployment.
---
 content/documentation/dsls/sql/index.html          |   1 +
 .../sdks/feature-comparison/index.html             |   1 +
 .../documentation/sdks/java-extensions/index.html  |   1 +
 .../index.html                                     | 147 ++++++++++++++-------
 content/documentation/sdks/java/index.html         |   3 +
 content/documentation/sdks/java/nexmark/index.html |   1 +
 .../documentation/sdks/python-custom-io/index.html |   1 +
 .../sdks/python-pipeline-dependencies/index.html   |   1 +
 .../sdks/python-type-safety/index.html             |   1 +
 content/documentation/sdks/python/index.html       |   1 +
 10 files changed, 112 insertions(+), 46 deletions(-)

diff --git a/content/documentation/dsls/sql/index.html 
b/content/documentation/dsls/sql/index.html
index ecaf7c6..2f03a80 100644
--- a/content/documentation/dsls/sql/index.html
+++ b/content/documentation/dsls/sql/index.html
@@ -96,6 +96,7 @@
                                                                                
                                                    alt="External link."></a>
     </li>
     <li><a href="/documentation/sdks/java-extensions/">Java SDK 
extensions</a></li>
+    <li><a href="/documentation/sdks/java-thirdparty/">Java 3rd party 
extensions</a></li>
     <li><a href="/documentation/sdks/java/nexmark/">Nexmark benchmark 
suite</a></li>
   </ul>
 </li>
diff --git a/content/documentation/sdks/feature-comparison/index.html 
b/content/documentation/sdks/feature-comparison/index.html
index f31a33d..0ed38ca 100644
--- a/content/documentation/sdks/feature-comparison/index.html
+++ b/content/documentation/sdks/feature-comparison/index.html
@@ -96,6 +96,7 @@
                                                                                
                                                    alt="External link."></a>
     </li>
     <li><a href="/documentation/sdks/java-extensions/">Java SDK 
extensions</a></li>
+    <li><a href="/documentation/sdks/java-thirdparty/">Java 3rd party 
extensions</a></li>
     <li><a href="/documentation/sdks/java/nexmark/">Nexmark benchmark 
suite</a></li>
   </ul>
 </li>
diff --git a/content/documentation/sdks/java-extensions/index.html 
b/content/documentation/sdks/java-extensions/index.html
index 880b726..4496ff3 100644
--- a/content/documentation/sdks/java-extensions/index.html
+++ b/content/documentation/sdks/java-extensions/index.html
@@ -96,6 +96,7 @@
                                                                                
                                                    alt="External link."></a>
     </li>
     <li><a href="/documentation/sdks/java-extensions/">Java SDK 
extensions</a></li>
+    <li><a href="/documentation/sdks/java-thirdparty/">Java 3rd party 
extensions</a></li>
     <li><a href="/documentation/sdks/java/nexmark/">Nexmark benchmark 
suite</a></li>
   </ul>
 </li>
diff --git a/content/documentation/sdks/java-extensions/index.html 
b/content/documentation/sdks/java-thirdparty/index.html
similarity index 67%
copy from content/documentation/sdks/java-extensions/index.html
copy to content/documentation/sdks/java-thirdparty/index.html
index 880b726..0967e4c 100644
--- a/content/documentation/sdks/java-extensions/index.html
+++ b/content/documentation/sdks/java-thirdparty/index.html
@@ -4,7 +4,7 @@
   <meta charset="utf-8">
   <meta http-equiv="X-UA-Compatible" content="IE=edge">
   <meta name="viewport" content="width=device-width, initial-scale=1">
-  <title>Beam Java SDK Extensions</title>
+  <title>Beam 3rd Party Java Extensions</title>
   <meta name="description" content="Apache Beam is an open source, unified 
model and set of language-specific SDKs for defining and executing data 
processing workflows, and also data ingestion and integration flows, supporting 
Enterprise Integration Patterns (EIPs) and Domain Specific Languages (DSLs). 
Dataflow pipelines simplify the mechanics of large-scale batch and streaming 
data processing and can run on a number of runtimes like Apache Flink, Apache 
Spark, and Google Cloud Dataflow  [...]
 ">
   <link href="https://fonts.googleapis.com/css?family=Roboto:100,300,400"; 
rel="stylesheet">
@@ -15,7 +15,7 @@
   <script src="/js/fix-menu.js"></script>
   <script src="/js/section-nav.js"></script>
   <script src="/js/page-nav.js"></script>
-  <link rel="canonical" 
href="https://beam.apache.org/documentation/sdks/java-extensions/"; 
data-proofer-ignore>
+  <link rel="canonical" 
href="https://beam.apache.org/documentation/sdks/java-thirdparty/"; 
data-proofer-ignore>
   <link rel="shortcut icon" type="image/x-icon" href="/images/favicon.ico">
   <link rel="alternate" type="application/rss+xml" title="Apache Beam" 
href="https://beam.apache.org/feed.xml";>
   <script>
@@ -96,6 +96,7 @@
                                                                                
                                                    alt="External link."></a>
     </li>
     <li><a href="/documentation/sdks/java-extensions/">Java SDK 
extensions</a></li>
+    <li><a href="/documentation/sdks/java-thirdparty/">Java 3rd party 
extensions</a></li>
     <li><a href="/documentation/sdks/java/nexmark/">Nexmark benchmark 
suite</a></li>
   </ul>
 </li>
@@ -129,12 +130,22 @@
         
 
 <ul class="nav">
-  <li><a href="#join-library">Join-library</a></li>
-  <li><a href="#sorter">Sorter</a>
+  <li><a href="#parsing-httpdnginx-access-logs">Parsing HTTPD/NGINX access 
logs.</a>
     <ul>
-      <li><a href="#caveats">Caveats</a></li>
-      <li><a href="#options">Options</a></li>
-      <li><a href="#example-usage-of-sortvalues">Example usage of <code 
class="highlighter-rouge">SortValues</code></a></li>
+      <li><a href="#summary">Summary</a></li>
+      <li><a href="#project-page">Project page</a></li>
+      <li><a href="#license">License</a></li>
+      <li><a href="#download">Download</a></li>
+      <li><a href="#code-example">Code example</a></li>
+    </ul>
+  </li>
+  <li><a href="#analyzing-the-useragent-string">Analyzing the Useragent 
string</a>
+    <ul>
+      <li><a href="#summary-1">Summary</a></li>
+      <li><a href="#project-page-1">Project page</a></li>
+      <li><a href="#license-1">License</a></li>
+      <li><a href="#download-1">Download</a></li>
+      <li><a href="#code-example-1">Code example</a></li>
     </ul>
   </li>
 </ul>
@@ -143,64 +154,108 @@
       </nav>
 
       <div class="body__contained body__section-nav">
-        <h1 id="apache-beam-java-sdk-extensions">Apache Beam Java SDK 
Extensions</h1>
+        <h1 id="apache-beam-3rd-party-java-extensions">Apache Beam 3rd Party 
Java Extensions</h1>
+
+<p>These are some of the 3rd party Java libaries that may be useful for 
specific applications.</p>
 
-<h2 id="join-library">Join-library</h2>
+<h2 id="parsing-httpdnginx-access-logs">Parsing HTTPD/NGINX access logs.</h2>
 
-<p>Join-library provides inner join, outer left join, and outer right join 
functions. The aim
-is to simplify the most common cases of join to a simple function call.</p>
+<h3 id="summary">Summary</h3>
+<p>The Apache HTTPD webserver creates logfiles that contain valuable 
information about the requests that have been done to
+the webserver. The format of these log files is a configuration option in the 
Apache HTTPD server so parsing this
+into useful data elements is normally very hard to do.</p>
 
-<p>The functions are generic and support joins of any Beam-supported types.
-Input to the join functions are <code 
class="highlighter-rouge">PCollections</code> of <code 
class="highlighter-rouge">Key</code> / <code 
class="highlighter-rouge">Value</code>s. Both
-the left and right <code class="highlighter-rouge">PCollection</code>s need 
the same type for the key. All the join
-functions return a <code class="highlighter-rouge">Key</code> / <code 
class="highlighter-rouge">Value</code> where <code 
class="highlighter-rouge">Key</code> is the join key and value is
-a <code class="highlighter-rouge">Key</code> / <code 
class="highlighter-rouge">Value</code> where the key is the left value and 
right is the value.</p>
+<p>To solve this problem in an easy way a library was created that works in 
combination with Apache Beam
+and is capable of doing this for both the Apache HTTPD and NGINX.</p>
 
-<p>For outer joins, the user must provide a value that represents <code 
class="highlighter-rouge">null</code> because <code 
class="highlighter-rouge">null</code>
-cannot be serialized.</p>
+<p>The basic idea is that the logformat specification is the schema used to 
create the line. 
+This parser is simply initialized with this schema and the list of fields you 
want to extract.</p>
 
-<p>Example usage:</p>
+<h3 id="project-page">Project page</h3>
+<p><a 
href="https://github.com/nielsbasjes/logparser";>https://github.com/nielsbasjes/logparser</a></p>
 
-<div class="highlighter-rouge"><pre 
class="highlight"><code>PCollection&lt;KV&lt;String, String&gt;&gt; 
leftPcollection = ...
-PCollection&lt;KV&lt;String, Long&gt;&gt; rightPcollection = ...
+<h3 id="license">License</h3>
+<p>Apache License 2.0</p>
 
-PCollection&lt;KV&lt;String, KV&lt;String, Long&gt;&gt;&gt; joinedPcollection =
-  Join.innerJoin(leftPcollection, rightPcollection);
+<h3 id="download">Download</h3>
+<div class="highlighter-rouge"><pre class="highlight"><code>&lt;dependency&gt;
+  &lt;groupId&gt;nl.basjes.parse.httpdlog&lt;/groupId&gt;
+  &lt;artifactId&gt;httpdlog-parser&lt;/artifactId&gt;
+  &lt;version&gt;5.0&lt;/version&gt;
+&lt;/dependency&gt;
 </code></pre>
 </div>
 
-<h2 id="sorter">Sorter</h2>
-
-<p>This module provides the <code class="highlighter-rouge">SortValues</code> 
transform, which takes a <code 
class="highlighter-rouge">PCollection&lt;KV&lt;K, Iterable&lt;KV&lt;K2, 
V&gt;&gt;&gt;&gt;</code> and produces a <code 
class="highlighter-rouge">PCollection&lt;KV&lt;K, Iterable&lt;KV&lt;K2, 
V&gt;&gt;&gt;&gt;</code> where, for each primary key <code 
class="highlighter-rouge">K</code> the paired <code 
class="highlighter-rouge">Iterable&lt;KV&lt;K2, V&gt;&gt;</code> has been 
sorted b [...]
-
-<h3 id="caveats">Caveats</h3>
-
-<ul>
-  <li>This transform performs value-only sorting; the iterable accompanying 
each key is sorted, but <em>there is no relationship between different 
keys</em>, as Beam does not support any defined relationship between different 
elements in a <code class="highlighter-rouge">PCollection</code>.</li>
-  <li>Each <code class="highlighter-rouge">Iterable&lt;KV&lt;K2, 
V&gt;&gt;</code> is sorted on a single worker using local memory and disk. This 
means that <code class="highlighter-rouge">SortValues</code> may be a 
performance and/or scalability bottleneck when used in different pipelines. For 
example, users are discouraged from using <code 
class="highlighter-rouge">SortValues</code> on a <code 
class="highlighter-rouge">PCollection</code> of a single element to globally 
sort a large <cod [...]
-</ul>
+<h3 id="code-example">Code example</h3>
+
+<p>Assuming a WebEvent class that has a the setters setIP, setQueryImg and 
setQueryStringValues</p>
+
+<div class="highlighter-rouge"><pre 
class="highlight"><code>PCollection&lt;WebEvent&gt; filledWebEvents = input
+  .apply("Extract Elements from logline",
+    ParDo.of(new DoFn&lt;String, WebEvent&gt;() {
+      private Parser&lt;WebEvent&gt; parser;
+
+      @Setup
+      public void setup() throws NoSuchMethodException {
+        parser = new HttpdLoglineParser&lt;&gt;(WebEvent.class, 
+            "%h %l %u %t \"%r\" %&gt;s %b \"%{Referer}i\" \"%{User-Agent}i\" 
\"%{Cookie}i\"");
+        parser.addParseTarget("setIP",                  
"IP:connection.client.host");
+        parser.addParseTarget("setQueryImg",            
"STRING:request.firstline.uri.query.img");
+        parser.addParseTarget("setQueryStringValues",   
"STRING:request.firstline.uri.query.*");
+      }
+
+      @ProcessElement
+      public void processElement(ProcessContext c) throws 
InvalidDissectorException, MissingDissectorsException, DissectionFailure {
+        c.output(parser.parse(c.element()));
+      }
+    })
+  );
+</code></pre>
+</div>
 
-<h3 id="options">Options</h3>
+<h2 id="analyzing-the-useragent-string">Analyzing the Useragent string</h2>
 
-<ul>
-  <li>The user can customize the temporary location used if sorting requires 
spilling to disk and the maximum amount of memory to use by creating a custom 
instance of <code 
class="highlighter-rouge">BufferedExternalSorter.Options</code> to pass into 
<code class="highlighter-rouge">SortValues.create</code>.</li>
-</ul>
+<h3 id="summary-1">Summary</h3>
+<p>Parse and analyze the useragent string and extract as many relevant 
attributes as possible.</p>
 
-<h3 id="example-usage-of-sortvalues">Example usage of <code 
class="highlighter-rouge">SortValues</code></h3>
+<h3 id="project-page-1">Project page</h3>
+<p><a 
href="https://github.com/nielsbasjes/yauaa";>https://github.com/nielsbasjes/yauaa</a></p>
 
-<div class="highlighter-rouge"><pre 
class="highlight"><code>PCollection&lt;KV&lt;String, KV&lt;String, 
Integer&gt;&gt;&gt; input = ...
+<h3 id="license-1">License</h3>
+<p>Apache License 2.0</p>
 
-// Group by primary key, bringing &lt;SecondaryKey, Value&gt; pairs for the 
same key together.
-PCollection&lt;KV&lt;String, Iterable&lt;KV&lt;String, Integer&gt;&gt;&gt;&gt; 
grouped =
-    input.apply(GroupByKey.&lt;String, KV&lt;String, Integer&gt;&gt;create());
+<h3 id="download-1">Download</h3>
+<div class="highlighter-rouge"><pre class="highlight"><code>&lt;dependency&gt;
+  &lt;groupId&gt;nl.basjes.parse.useragent&lt;/groupId&gt;
+  &lt;artifactId&gt;yauaa-beam&lt;/artifactId&gt;
+  &lt;version&gt;4.2&lt;/version&gt;
+&lt;/dependency&gt;
+</code></pre>
+</div>
 
-// For every primary key, sort the iterable of &lt;SecondaryKey, Value&gt; 
pairs by secondary key.
-PCollection&lt;KV&lt;String, Iterable&lt;KV&lt;String, Integer&gt;&gt;&gt;&gt; 
groupedAndSorted =
-    grouped.apply(
-        SortValues.&lt;String, String, 
Integer&gt;create(BufferedExternalSorter.options()));
+<h3 id="code-example-1">Code example</h3>
+<div class="highlighter-rouge"><pre 
class="highlight"><code>PCollection&lt;WebEvent&gt; filledWebEvents = input
+    .apply("Extract Elements from Useragent",
+      ParDo.of(new UserAgentAnalysisDoFn&lt;WebEvent&gt;() {
+        @Override
+        public String getUserAgentString(WebEvent record) {
+          return record.useragent;
+        }
+
+        @YauaaField("DeviceClass")
+        public void setDC(WebEvent record, String value) {
+          record.deviceClass = value;
+        }
+
+        @YauaaField("AgentNameVersion")
+        public void setANV(WebEvent record, String value) {
+          record.agentNameVersion = value;
+        }
+    }));
 </code></pre>
 </div>
 
+
       </div>
     </div>
     <footer class="footer">
diff --git a/content/documentation/sdks/java/index.html 
b/content/documentation/sdks/java/index.html
index 8ee69fc..24bf79c 100644
--- a/content/documentation/sdks/java/index.html
+++ b/content/documentation/sdks/java/index.html
@@ -96,6 +96,7 @@
                                                                                
                                                    alt="External link."></a>
     </li>
     <li><a href="/documentation/sdks/java-extensions/">Java SDK 
extensions</a></li>
+    <li><a href="/documentation/sdks/java-thirdparty/">Java 3rd party 
extensions</a></li>
     <li><a href="/documentation/sdks/java/nexmark/">Nexmark benchmark 
suite</a></li>
   </ul>
 </li>
@@ -166,6 +167,8 @@
   <li><a href="/documentation/sdks/java/nexmark">Nexmark</a> is a benchmark 
suite that runs in batch and streaming modes.</li>
 </ul>
 
+<p>In addition several <a href="/documentation/sdks/java-thirdparty/">3rd 
party Java libraries</a> exist.</p>
+
       </div>
     </div>
     <footer class="footer">
diff --git a/content/documentation/sdks/java/nexmark/index.html 
b/content/documentation/sdks/java/nexmark/index.html
index e490497..1db73a7 100644
--- a/content/documentation/sdks/java/nexmark/index.html
+++ b/content/documentation/sdks/java/nexmark/index.html
@@ -96,6 +96,7 @@
                                                                                
                                                    alt="External link."></a>
     </li>
     <li><a href="/documentation/sdks/java-extensions/">Java SDK 
extensions</a></li>
+    <li><a href="/documentation/sdks/java-thirdparty/">Java 3rd party 
extensions</a></li>
     <li><a href="/documentation/sdks/java/nexmark/">Nexmark benchmark 
suite</a></li>
   </ul>
 </li>
diff --git a/content/documentation/sdks/python-custom-io/index.html 
b/content/documentation/sdks/python-custom-io/index.html
index 947b33c..566e17d 100644
--- a/content/documentation/sdks/python-custom-io/index.html
+++ b/content/documentation/sdks/python-custom-io/index.html
@@ -96,6 +96,7 @@
                                                                                
                                                    alt="External link."></a>
     </li>
     <li><a href="/documentation/sdks/java-extensions/">Java SDK 
extensions</a></li>
+    <li><a href="/documentation/sdks/java-thirdparty/">Java 3rd party 
extensions</a></li>
     <li><a href="/documentation/sdks/java/nexmark/">Nexmark benchmark 
suite</a></li>
   </ul>
 </li>
diff --git a/content/documentation/sdks/python-pipeline-dependencies/index.html 
b/content/documentation/sdks/python-pipeline-dependencies/index.html
index 3afbcab..89a398b 100644
--- a/content/documentation/sdks/python-pipeline-dependencies/index.html
+++ b/content/documentation/sdks/python-pipeline-dependencies/index.html
@@ -96,6 +96,7 @@
                                                                                
                                                    alt="External link."></a>
     </li>
     <li><a href="/documentation/sdks/java-extensions/">Java SDK 
extensions</a></li>
+    <li><a href="/documentation/sdks/java-thirdparty/">Java 3rd party 
extensions</a></li>
     <li><a href="/documentation/sdks/java/nexmark/">Nexmark benchmark 
suite</a></li>
   </ul>
 </li>
diff --git a/content/documentation/sdks/python-type-safety/index.html 
b/content/documentation/sdks/python-type-safety/index.html
index ad63eac..06d4512 100644
--- a/content/documentation/sdks/python-type-safety/index.html
+++ b/content/documentation/sdks/python-type-safety/index.html
@@ -96,6 +96,7 @@
                                                                                
                                                    alt="External link."></a>
     </li>
     <li><a href="/documentation/sdks/java-extensions/">Java SDK 
extensions</a></li>
+    <li><a href="/documentation/sdks/java-thirdparty/">Java 3rd party 
extensions</a></li>
     <li><a href="/documentation/sdks/java/nexmark/">Nexmark benchmark 
suite</a></li>
   </ul>
 </li>
diff --git a/content/documentation/sdks/python/index.html 
b/content/documentation/sdks/python/index.html
index 3b327da..65c5e8f 100644
--- a/content/documentation/sdks/python/index.html
+++ b/content/documentation/sdks/python/index.html
@@ -96,6 +96,7 @@
                                                                                
                                                    alt="External link."></a>
     </li>
     <li><a href="/documentation/sdks/java-extensions/">Java SDK 
extensions</a></li>
+    <li><a href="/documentation/sdks/java-thirdparty/">Java 3rd party 
extensions</a></li>
     <li><a href="/documentation/sdks/java/nexmark/">Nexmark benchmark 
suite</a></li>
   </ul>
 </li>

-- 
To stop receiving notification emails like this one, please contact
[email protected].

Reply via email to