Modified: incubator/samza/site/sitemap.xml URL: http://svn.apache.org/viewvc/incubator/samza/site/sitemap.xml?rev=1520461&r1=1520460&r2=1520461&view=diff ============================================================================== --- incubator/samza/site/sitemap.xml (original) +++ incubator/samza/site/sitemap.xml Fri Sep 6 00:12:53 2013 @@ -4,7 +4,7 @@ <url> <loc>http://samza.incubator.apache.org/</loc> - <lastmod>2013-09-02</lastmod> + <lastmod>2013-09-05</lastmod> <changefreq>daily</changefreq> <priority>1.0</priority> </url> @@ -14,273 +14,273 @@ <url> <loc>http://samza.incubator.apache.org/community/committers.html</loc> - <lastmod>2013-09-02</lastmod> + <lastmod>2013-09-05</lastmod> </url> <url> <loc>http://samza.incubator.apache.org/community/irc.html</loc> - <lastmod>2013-09-02</lastmod> + <lastmod>2013-09-05</lastmod> </url> <url> <loc>http://samza.incubator.apache.org/community/mailing-lists.html</loc> - <lastmod>2013-09-02</lastmod> + <lastmod>2013-09-05</lastmod> </url> <url> <loc>http://samza.incubator.apache.org/contribute/code.html</loc> - <lastmod>2013-09-02</lastmod> + <lastmod>2013-09-05</lastmod> </url> <url> <loc>http://samza.incubator.apache.org/contribute/coding-guide.html</loc> - <lastmod>2013-09-02</lastmod> + <lastmod>2013-09-05</lastmod> </url> <url> <loc>http://samza.incubator.apache.org/contribute/disclaimer.html</loc> - <lastmod>2013-09-02</lastmod> + <lastmod>2013-09-05</lastmod> </url> <url> <loc>http://samza.incubator.apache.org/contribute/projects.html</loc> - <lastmod>2013-09-02</lastmod> + <lastmod>2013-09-05</lastmod> </url> <url> <loc>http://samza.incubator.apache.org/contribute/rules.html</loc> - <lastmod>2013-09-02</lastmod> + <lastmod>2013-09-05</lastmod> </url> <url> <loc>http://samza.incubator.apache.org/contribute/seps.html</loc> - <lastmod>2013-09-02</lastmod> + <lastmod>2013-09-05</lastmod> </url> <url> <loc>http://samza.incubator.apache.org/index.html</loc> - <lastmod>2013-09-02</lastmod> + <lastmod>2013-09-05</lastmod> </url> <url> <loc>http://samza.incubator.apache.org/learn/documentation/0.7.0/api/overview.html</loc> - <lastmod>2013-09-02</lastmod> + <lastmod>2013-09-05</lastmod> </url> <url> <loc>http://samza.incubator.apache.org/learn/documentation/0.7.0/comparisons/introduction.html</loc> - <lastmod>2013-09-02</lastmod> + <lastmod>2013-09-05</lastmod> </url> <url> <loc>http://samza.incubator.apache.org/learn/documentation/0.7.0/comparisons/mupd8.html</loc> - <lastmod>2013-09-02</lastmod> + <lastmod>2013-09-05</lastmod> </url> <url> <loc>http://samza.incubator.apache.org/learn/documentation/0.7.0/comparisons/storm.html</loc> - <lastmod>2013-09-02</lastmod> + <lastmod>2013-09-05</lastmod> </url> <url> <loc>http://samza.incubator.apache.org/learn/documentation/0.7.0/container/checkpointing.html</loc> - <lastmod>2013-09-02</lastmod> + <lastmod>2013-09-05</lastmod> </url> <url> <loc>http://samza.incubator.apache.org/learn/documentation/0.7.0/container/event-loop.html</loc> - <lastmod>2013-09-02</lastmod> + <lastmod>2013-09-05</lastmod> </url> <url> <loc>http://samza.incubator.apache.org/learn/documentation/0.7.0/container/index.html</loc> - <lastmod>2013-09-02</lastmod> + <lastmod>2013-09-05</lastmod> </url> <url> <loc>http://samza.incubator.apache.org/learn/documentation/0.7.0/container/jmx.html</loc> - <lastmod>2013-09-02</lastmod> + <lastmod>2013-09-05</lastmod> </url> <url> <loc>http://samza.incubator.apache.org/learn/documentation/0.7.0/container/metrics.html</loc> - <lastmod>2013-09-02</lastmod> + <lastmod>2013-09-05</lastmod> </url> <url> <loc>http://samza.incubator.apache.org/learn/documentation/0.7.0/container/state-management.html</loc> - <lastmod>2013-09-02</lastmod> + <lastmod>2013-09-05</lastmod> </url> <url> <loc>http://samza.incubator.apache.org/learn/documentation/0.7.0/container/streams.html</loc> - <lastmod>2013-09-02</lastmod> + <lastmod>2013-09-05</lastmod> </url> <url> <loc>http://samza.incubator.apache.org/learn/documentation/0.7.0/container/task-runner.html</loc> - <lastmod>2013-09-02</lastmod> + <lastmod>2013-09-05</lastmod> </url> <url> <loc>http://samza.incubator.apache.org/learn/documentation/0.7.0/container/windowing.html</loc> - <lastmod>2013-09-02</lastmod> + <lastmod>2013-09-05</lastmod> </url> <url> <loc>http://samza.incubator.apache.org/learn/documentation/0.7.0/index.html</loc> - <lastmod>2013-09-02</lastmod> + <lastmod>2013-09-05</lastmod> </url> <url> <loc>http://samza.incubator.apache.org/learn/documentation/0.7.0/introduction/architecture.html</loc> - <lastmod>2013-09-02</lastmod> + <lastmod>2013-09-05</lastmod> </url> <url> <loc>http://samza.incubator.apache.org/learn/documentation/0.7.0/introduction/background.html</loc> - <lastmod>2013-09-02</lastmod> + <lastmod>2013-09-05</lastmod> </url> <url> <loc>http://samza.incubator.apache.org/learn/documentation/0.7.0/introduction/concepts.html</loc> - <lastmod>2013-09-02</lastmod> + <lastmod>2013-09-05</lastmod> </url> <url> <loc>http://samza.incubator.apache.org/learn/documentation/0.7.0/jobs/configuration.html</loc> - <lastmod>2013-09-02</lastmod> + <lastmod>2013-09-05</lastmod> </url> <url> <loc>http://samza.incubator.apache.org/learn/documentation/0.7.0/jobs/job-runner.html</loc> - <lastmod>2013-09-02</lastmod> + <lastmod>2013-09-05</lastmod> </url> <url> <loc>http://samza.incubator.apache.org/learn/documentation/0.7.0/jobs/logging.html</loc> - <lastmod>2013-09-02</lastmod> + <lastmod>2013-09-05</lastmod> </url> <url> <loc>http://samza.incubator.apache.org/learn/documentation/0.7.0/jobs/packaging.html</loc> - <lastmod>2013-09-02</lastmod> + <lastmod>2013-09-05</lastmod> </url> <url> <loc>http://samza.incubator.apache.org/learn/documentation/0.7.0/jobs/yarn-jobs.html</loc> - <lastmod>2013-09-02</lastmod> + <lastmod>2013-09-05</lastmod> </url> <url> <loc>http://samza.incubator.apache.org/learn/documentation/0.7.0/operations/kafka.html</loc> - <lastmod>2013-09-02</lastmod> + <lastmod>2013-09-05</lastmod> </url> <url> <loc>http://samza.incubator.apache.org/learn/documentation/0.7.0/operations/security.html</loc> - <lastmod>2013-09-02</lastmod> + <lastmod>2013-09-05</lastmod> </url> <url> <loc>http://samza.incubator.apache.org/learn/documentation/0.7.0/yarn/application-master.html</loc> - <lastmod>2013-09-02</lastmod> + <lastmod>2013-09-05</lastmod> </url> <url> <loc>http://samza.incubator.apache.org/learn/documentation/0.7.0/yarn/isolation.html</loc> - <lastmod>2013-09-02</lastmod> + <lastmod>2013-09-05</lastmod> </url> <url> <loc>http://samza.incubator.apache.org/learn/tutorials/0.7.0/index.html</loc> - <lastmod>2013-09-02</lastmod> + <lastmod>2013-09-05</lastmod> </url> <url> <loc>http://samza.incubator.apache.org/startup/download/index.html</loc> - <lastmod>2013-09-02</lastmod> + <lastmod>2013-09-05</lastmod> </url> <url> <loc>http://samza.incubator.apache.org/startup/hello-samza/0.7.0/index.html</loc> - <lastmod>2013-09-02</lastmod> + <lastmod>2013-09-05</lastmod> </url>
Modified: incubator/samza/site/startup/download/index.html URL: http://svn.apache.org/viewvc/incubator/samza/site/startup/download/index.html?rev=1520461&r1=1520460&r2=1520461&view=diff ============================================================================== --- incubator/samza/site/startup/download/index.html (original) +++ incubator/samza/site/startup/download/index.html Fri Sep 6 00:12:53 2013 @@ -129,7 +129,7 @@ Snapshot builds are available in the Apa <h3>Checking out and Building</h3> <p>If you're interested in working on Samza, or building the JARs from scratch, then you'll need to checkout and build the code. Samza does not have a binary release at this time. To check out and build Samza, run these commands.</p> -<div class="highlight"><pre><code class="text language-text">git clone http://git-wip-us.apache.org/repos/asf/incubator-samza.git +<div class="highlight"><pre><code class="text language-text" data-lang="text">git clone http://git-wip-us.apache.org/repos/asf/incubator-samza.git cd incubator-samza ./gradlew clean build </code></pre></div> Modified: incubator/samza/site/startup/hello-samza/0.7.0/index.html URL: http://svn.apache.org/viewvc/incubator/samza/site/startup/hello-samza/0.7.0/index.html?rev=1520461&r1=1520460&r2=1520461&view=diff ============================================================================== --- incubator/samza/site/startup/hello-samza/0.7.0/index.html (original) +++ incubator/samza/site/startup/hello-samza/0.7.0/index.html Fri Sep 6 00:12:53 2013 @@ -72,19 +72,19 @@ <h3>Get the Code</h3> <p>You'll need to check out and publish Samza, since it's not available in a Maven repository right now.</p> -<div class="highlight"><pre><code class="text language-text">git clone http://git-wip-us.apache.org/repos/asf/incubator-samza.git +<div class="highlight"><pre><code class="text language-text" data-lang="text">git clone http://git-wip-us.apache.org/repos/asf/incubator-samza.git cd incubator-samza ./gradlew -PscalaVersion=2.8.1 clean publishToMavenLocal </code></pre></div> <p>Next, check out the hello-samza project.</p> -<div class="highlight"><pre><code class="text language-text">git clone git://github.com/linkedin/hello-samza.git +<div class="highlight"><pre><code class="text language-text" data-lang="text">git clone git://github.com/linkedin/hello-samza.git </code></pre></div> <p>This project contains everything you'll need to run your first Samza jobs.</p> <h3>Start a Grid</h3> <p>A Samza grid usually comprises three different systems: <a href="http://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-site/YARN.html">YARN</a>, <a href="http://kafka.apache.org/">Kafka</a>, and <a href="http://zookeeper.apache.org/">ZooKeeper</a>. The hello-samza project comes with a script called "grid" to help you setup these systems. Start by running:</p> -<div class="highlight"><pre><code class="text language-text">bin/grid +<div class="highlight"><pre><code class="text language-text" data-lang="text">bin/grid </code></pre></div> <p>This command will download, install, and start ZooKeeper, Kafka, and YARN. All package files will be put in a sub-directory called "deploy" inside hello-samza's root folder.</p> @@ -93,34 +93,34 @@ cd incubator-samza <h3>Build a Samza Job Package</h3> <p>Before you can run a Samza job, you need to build a package for it. This package is what YARN uses to deploy your jobs on the grid.</p> -<div class="highlight"><pre><code class="text language-text">mvn clean package +<div class="highlight"><pre><code class="text language-text" data-lang="text">mvn clean package mkdir -p deploy/samza tar -xvf ./samza-job-package/target/samza-job-package-0.7.0-dist.tar.gz -C deploy/samza </code></pre></div> <h3>Run a Samza Job</h3> <p>After you've built your Samza package, you can start a job on the grid using the run-job.sh script.</p> -<div class="highlight"><pre><code class="text language-text">deploy/samza/bin/run-job.sh --config-factory=org.apache.samza.config.factories.PropertiesConfigFactory --config-path=file://$PWD/deploy/samza/config/wikipedia-feed.properties +<div class="highlight"><pre><code class="text language-text" data-lang="text">deploy/samza/bin/run-job.sh --config-factory=org.apache.samza.config.factories.PropertiesConfigFactory --config-path=file://$PWD/deploy/samza/config/wikipedia-feed.properties </code></pre></div> <p>The job will consume a feed of real-time edits from Wikipedia, and produce them to a Kafka topic called "wikipedia-raw". Give the job a minute to startup, and then tail the Kafka topic:</p> -<div class="highlight"><pre><code class="text language-text">deploy/kafka/bin/kafka-console-consumer.sh --zookeeper localhost:2181 --topic wikipedia-raw +<div class="highlight"><pre><code class="text language-text" data-lang="text">deploy/kafka/bin/kafka-console-consumer.sh --zookeeper localhost:2181 --topic wikipedia-raw </code></pre></div> <p>Pretty neat, right? Now, check out the YARN UI again (<a href="http://localhost:8088">http://localhost:8088</a>). This time around, you'll see your Samza job is running!</p> <h3>Generate Wikipedia Statistics</h3> <p>Let's calculate some statistics based on the messages in the wikipedia-raw topic. Start two more jobs:</p> -<div class="highlight"><pre><code class="text language-text">deploy/samza/bin/run-job.sh --config-factory=org.apache.samza.config.factories.PropertiesConfigFactory --config-path=file://$PWD/deploy/samza/config/wikipedia-parser.properties +<div class="highlight"><pre><code class="text language-text" data-lang="text">deploy/samza/bin/run-job.sh --config-factory=org.apache.samza.config.factories.PropertiesConfigFactory --config-path=file://$PWD/deploy/samza/config/wikipedia-parser.properties deploy/samza/bin/run-job.sh --config-factory=org.apache.samza.config.factories.PropertiesConfigFactory --config-path=file://$PWD/deploy/samza/config/wikipedia-stats.properties </code></pre></div> <p>The first job (wikipedia-parser) parses the messages in wikipedia-raw, and extracts information about the size of the edit, who made the change, etc. You can take a look at its output with:</p> -<div class="highlight"><pre><code class="text language-text">deploy/kafka/bin/kafka-console-consumer.sh --zookeeper localhost:2181 --topic wikipedia-edits +<div class="highlight"><pre><code class="text language-text" data-lang="text">deploy/kafka/bin/kafka-console-consumer.sh --zookeeper localhost:2181 --topic wikipedia-edits </code></pre></div> <p>The last job (wikipedia-stats) reads messages from the wikipedia-edits topic, and calculates counts, every ten seconds, for all edits that were made during that window. It outputs these counts to the wikipedia-stats topic.</p> -<div class="highlight"><pre><code class="text language-text">deploy/kafka/bin/kafka-console-consumer.sh --zookeeper localhost:2181 --topic wikipedia-stats +<div class="highlight"><pre><code class="text language-text" data-lang="text">deploy/kafka/bin/kafka-console-consumer.sh --zookeeper localhost:2181 --topic wikipedia-stats </code></pre></div> <p>The messages in the stats topic look like this:</p> -<div class="highlight"><pre><code class="text language-text">{"is-talk":2,"bytes-added":5276,"edits":13,"unique-titles":13} +<div class="highlight"><pre><code class="text language-text" data-lang="text">{"is-talk":2,"bytes-added":5276,"edits":13,"unique-titles":13} {"is-bot-edit":1,"is-talk":3,"bytes-added":4211,"edits":30,"unique-titles":30,"is-unpatrolled":1,"is-new":2,"is-minor":7} {"bytes-added":3180,"edits":19,"unique-titles":19,"is-unpatrolled":1,"is-new":1,"is-minor":3} {"bytes-added":2218,"edits":18,"unique-titles":18,"is-unpatrolled":2,"is-new":2,"is-minor":3} @@ -130,7 +130,7 @@ deploy/samza/bin/run-job.sh --config-fac <h3>Shutdown</h3> <p>After you're done, you can clean everything up using the same grid script.</p> -<div class="highlight"><pre><code class="text language-text">bin/grid stop yarn +<div class="highlight"><pre><code class="text language-text" data-lang="text">bin/grid stop yarn bin/grid stop kafka bin/grid stop zookeeper </code></pre></div>
