This is an automated email from the ASF dual-hosted git repository.

guoyp pushed a commit to branch asf-site
in repository https://gitbox.apache.org/repos/asf/griffin-site.git


The following commit(s) were added to refs/heads/asf-site by this push:
     new d286af0  Updated asf-site site from master 
(3f07e1dafdfe10546b8b1e337690b060b839513e)
d286af0 is described below

commit d286af0c5046a2742ed4006062471744696f84a5
Author: William Guo <[email protected]>
AuthorDate: Mon Jan 21 21:41:17 2019 +0800

    Updated asf-site site from master (3f07e1dafdfe10546b8b1e337690b060b839513e)
---
 docs/community.html      |   2 +
 docs/conf.html           |   2 +
 docs/contribute.html     |   2 +
 docs/contributors.html   |   2 +
 docs/download.html       |   2 +
 docs/latest.html         |   2 +
 docs/profiling.html      |   2 +
 docs/quickstart-cn.html  | 606 +++++++++++++++++++++++++++++++++++++++++++++++
 docs/quickstart.html     |   2 +
 docs/usecases.html       |   2 +
 images/arch-1.png        | Bin 0 -> 307285 bytes
 images/dashboard-big.png | Bin 0 -> 170904 bytes
 images/project.jpg       | Bin 0 -> 59210 bytes
 13 files changed, 624 insertions(+)

diff --git a/docs/community.html b/docs/community.html
index 0b74114..25b9769 100644
--- a/docs/community.html
+++ b/docs/community.html
@@ -97,6 +97,8 @@ under the License.
         
           <li class="sidenavli  "><a href="/docs/quickstart.html" 
data-permalink="/docs/community.html" id="">Quick Start</a></li>
         
+          <li class="sidenavli  "><a href="/docs/quickstart-cn.html" 
data-permalink="/docs/community.html" id="">Quick Start (Chinese 
Version)</a></li>
+        
           <li class="sidenavli  "><a href="/docs/usecases.html" 
data-permalink="/docs/community.html" id="">Streaming Use Cases</a></li>
         
           <li class="sidenavli  "><a href="/docs/profiling.html" 
data-permalink="/docs/community.html" id="">Profiling Use Cases</a></li>
diff --git a/docs/conf.html b/docs/conf.html
index 111dccc..d6950be 100644
--- a/docs/conf.html
+++ b/docs/conf.html
@@ -97,6 +97,8 @@ under the License.
         
           <li class="sidenavli  "><a href="/docs/quickstart.html" 
data-permalink="/docs/conf.html" id="">Quick Start</a></li>
         
+          <li class="sidenavli  "><a href="/docs/quickstart-cn.html" 
data-permalink="/docs/conf.html" id="">Quick Start (Chinese Version)</a></li>
+        
           <li class="sidenavli  "><a href="/docs/usecases.html" 
data-permalink="/docs/conf.html" id="">Streaming Use Cases</a></li>
         
           <li class="sidenavli  "><a href="/docs/profiling.html" 
data-permalink="/docs/conf.html" id="">Profiling Use Cases</a></li>
diff --git a/docs/contribute.html b/docs/contribute.html
index 032b067..9cd2c88 100644
--- a/docs/contribute.html
+++ b/docs/contribute.html
@@ -97,6 +97,8 @@ under the License.
         
           <li class="sidenavli  "><a href="/docs/quickstart.html" 
data-permalink="/docs/contribute.html" id="">Quick Start</a></li>
         
+          <li class="sidenavli  "><a href="/docs/quickstart-cn.html" 
data-permalink="/docs/contribute.html" id="">Quick Start (Chinese 
Version)</a></li>
+        
           <li class="sidenavli  "><a href="/docs/usecases.html" 
data-permalink="/docs/contribute.html" id="">Streaming Use Cases</a></li>
         
           <li class="sidenavli  "><a href="/docs/profiling.html" 
data-permalink="/docs/contribute.html" id="">Profiling Use Cases</a></li>
diff --git a/docs/contributors.html b/docs/contributors.html
index 84ed7e9..ee59c8e 100644
--- a/docs/contributors.html
+++ b/docs/contributors.html
@@ -97,6 +97,8 @@ under the License.
         
           <li class="sidenavli  "><a href="/docs/quickstart.html" 
data-permalink="/docs/contributors.html" id="">Quick Start</a></li>
         
+          <li class="sidenavli  "><a href="/docs/quickstart-cn.html" 
data-permalink="/docs/contributors.html" id="">Quick Start (Chinese 
Version)</a></li>
+        
           <li class="sidenavli  "><a href="/docs/usecases.html" 
data-permalink="/docs/contributors.html" id="">Streaming Use Cases</a></li>
         
           <li class="sidenavli  "><a href="/docs/profiling.html" 
data-permalink="/docs/contributors.html" id="">Profiling Use Cases</a></li>
diff --git a/docs/download.html b/docs/download.html
index bda312b..c461921 100644
--- a/docs/download.html
+++ b/docs/download.html
@@ -97,6 +97,8 @@ under the License.
         
           <li class="sidenavli  "><a href="/docs/quickstart.html" 
data-permalink="/docs/download.html" id="">Quick Start</a></li>
         
+          <li class="sidenavli  "><a href="/docs/quickstart-cn.html" 
data-permalink="/docs/download.html" id="">Quick Start (Chinese 
Version)</a></li>
+        
           <li class="sidenavli  "><a href="/docs/usecases.html" 
data-permalink="/docs/download.html" id="">Streaming Use Cases</a></li>
         
           <li class="sidenavli  "><a href="/docs/profiling.html" 
data-permalink="/docs/download.html" id="">Profiling Use Cases</a></li>
diff --git a/docs/latest.html b/docs/latest.html
index fe16d28..0149f1d 100644
--- a/docs/latest.html
+++ b/docs/latest.html
@@ -97,6 +97,8 @@ under the License.
         
           <li class="sidenavli  "><a href="/docs/quickstart.html" 
data-permalink="/docs/latest.html" id="">Quick Start</a></li>
         
+          <li class="sidenavli  "><a href="/docs/quickstart-cn.html" 
data-permalink="/docs/latest.html" id="">Quick Start (Chinese Version)</a></li>
+        
           <li class="sidenavli  "><a href="/docs/usecases.html" 
data-permalink="/docs/latest.html" id="">Streaming Use Cases</a></li>
         
           <li class="sidenavli  "><a href="/docs/profiling.html" 
data-permalink="/docs/latest.html" id="">Profiling Use Cases</a></li>
diff --git a/docs/profiling.html b/docs/profiling.html
index f9bdfe1..86c18f9 100644
--- a/docs/profiling.html
+++ b/docs/profiling.html
@@ -97,6 +97,8 @@ under the License.
         
           <li class="sidenavli  "><a href="/docs/quickstart.html" 
data-permalink="/docs/profiling.html" id="">Quick Start</a></li>
         
+          <li class="sidenavli  "><a href="/docs/quickstart-cn.html" 
data-permalink="/docs/profiling.html" id="">Quick Start (Chinese 
Version)</a></li>
+        
           <li class="sidenavli  "><a href="/docs/usecases.html" 
data-permalink="/docs/profiling.html" id="">Streaming Use Cases</a></li>
         
           <li class="sidenavli  current"><a href="/docs/profiling.html" 
data-permalink="/docs/profiling.html" id="">Profiling Use Cases</a></li>
diff --git a/docs/quickstart-cn.html b/docs/quickstart-cn.html
new file mode 100644
index 0000000..435c5a7
--- /dev/null
+++ b/docs/quickstart-cn.html
@@ -0,0 +1,606 @@
+<!DOCTYPE html>
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+<head>
+       <meta charset="utf-8">
+       <meta http-equiv="X-UA-Compatible" content="IE=edge">
+
+       <title>Griffin - Quick Start</title>
+       <meta name="description" content="Apache Griffin - Big Data Quality 
Solution For Batch and Streaming">
+
+       <meta name="keywords" content="Griffin, Hadoop, Security, Real Time">
+       <meta name="author" content="eBay Inc.">
+
+       <meta charset="utf-8">
+       <meta name="viewport" content="initial-scale=1">
+
+       <link rel="stylesheet" href="/css/animate.css">
+       <link rel="stylesheet" href="/css/bootstrap.min.css">
+
+       <link rel="stylesheet" href="/css/font-awesome.min.css">
+
+       <link rel="stylesheet" href="/css/misc.css">
+       <link rel="stylesheet" href="/css/style.css">
+       <link rel="stylesheet" href="/css/styles.css">
+       <link rel="stylesheet" href="/css/main.css">
+       <link rel="alternate" type="application/rss+xml" title="Griffin" 
href="http://griffin.apache.org/feed.xml"; />
+       <link rel="shortcut icon" href="/images/favicon.ico">
+
+       <!-- Baidu Analytics Tracking-->
+       <script>
+       var _hmt = _hmt || [];
+       (function() {
+         var hm = document.createElement("script");
+         hm.src = "//hm.baidu.com/hm.js?fedc55df2ea52777a679192e8f849ece";
+         var s = document.getElementsByTagName("script")[0];
+         s.parentNode.insertBefore(hm, s);
+       })();
+       </script>
+
+       <!-- Google Analytics Tracking -->
+       <script>
+         
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+         (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new 
Date();a=s.createElement(o),
+         
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+         
})(window,document,'script','//www.google-analytics.com/analytics.js','ga');
+         ga('create', 'UA-68929805-1', 'auto');
+         ga('send', 'pageview');
+       </script>
+</head>
+
+<body>
+<!-- header start -->
+<div id="home_page">
+  <div class="topbar">
+    <div class="container">
+      <div class="row" >
+        <nav class="navbar navbar-default">
+          <div class="container-fluid">
+            <!-- Brand and toggle get grouped for better mobile display -->
+            <div class="navbar-header">
+              <button type="button" class="navbar-toggle collapsed" 
data-toggle="collapse" data-target="#bs-example-navbar-collapse-1"> <span 
class="sr-only">Toggle navigation</span> <span class="icon-bar"></span> <span 
class="icon-bar"></span> <span class="icon-bar"></span> </button>
+              <a class="navbar-brand" href="/"><img src="/images/logo.png" 
height="44px" style="margin-top:-7px"></a> </div>
+            </div>
+          </div>
+          <!-- /.container-fluid -->
+        </nav>
+      </div>
+    </div>
+  </div>
+
+</div>
+<!-- header end -->
+<div class="container-fluid page-content">
+  <div class="row">
+    <div class="col-md-10 col-md-offset-1">
+      <!-- sidebar -->
+      <div class="col-xs-6 col-sm-3" id="sidebar" role="navigation">
+        <ul class="nav" id="adminnav">
+        
+        <li class="heading">Getting Started</li>
+        
+          <li class="sidenavli  "><a href="/docs/quickstart.html" 
data-permalink="/docs/quickstart-cn.html" id="">Quick Start</a></li>
+        
+          <li class="sidenavli  current"><a href="/docs/quickstart-cn.html" 
data-permalink="/docs/quickstart-cn.html" id="">Quick Start (Chinese 
Version)</a></li>
+        
+          <li class="sidenavli  "><a href="/docs/usecases.html" 
data-permalink="/docs/quickstart-cn.html" id="">Streaming Use Cases</a></li>
+        
+          <li class="sidenavli  "><a href="/docs/profiling.html" 
data-permalink="/docs/quickstart-cn.html" id="">Profiling Use Cases</a></li>
+        
+          <li class="sidenavli  "><a href="/docs/community.html" 
data-permalink="/docs/quickstart-cn.html" id="">Community</a></li>
+        
+          <li class="sidenavli  "><a href="/docs/conf.html" 
data-permalink="/docs/quickstart-cn.html" id="">Conference</a></li>
+        
+        <li class="divider"></li>
+      
+        <li class="heading">Development</li>
+        
+          <li class="sidenavli  "><a href="/docs/contribute.html" 
data-permalink="/docs/quickstart-cn.html" id="">Contribution</a></li>
+        
+          <li class="sidenavli  "><a href="/docs/contributors.html" 
data-permalink="/docs/quickstart-cn.html" id="">Contributors</a></li>
+        
+        <li class="divider"></li>
+      
+        <li class="heading">Download</li>
+        
+          <li class="sidenavli  "><a href="/docs/latest.html" 
data-permalink="/docs/quickstart-cn.html" id="">Latest version</a></li>
+        
+          <li class="sidenavli  "><a href="/docs/download.html" 
data-permalink="/docs/quickstart-cn.html" id="">Archived</a></li>
+        
+        <li class="divider"></li>
+      
+        <li class="sidenavli">
+          <a href="mailto:[email protected]"; target="_blank">Need 
Help?</a>
+        </li>
+        </ul>
+      </div>
+      <div class="col-xs-6 col-sm-9 page-main-content" style="margin-left: 
-15px" id="loadcontent">
+        <h1 class="page-header" style="margin-top: 0px">Quick Start</h1>
+        <h2 id="apache-griffin-入门指南">Apache Griffin 入门指南</h2>
+
+<p>数据质量模块是大数据平台中必不可少的一个功能组件,<a href="http://griffin.apache.org";>Apache 
Griffin</a>(以下简称Griffin)是一个开源的大数据数据质量解决方案,它支持批处理和流模式两种数据质量检测方式,可以从不同维度(比如离线任务执行完毕后检查源端和目标端的数据数量是否一致、源表的数据空值数量等)度量数据资产,从而提升数据的准确度、可信度。</p>
+
+<p>在Griffin的架构中,主要分为Define、Measure和Analyze三个部分,如下图所示:</p>
+
+<p><img src="/images/arch-1.png" alt="arch" /></p>
+
+<p>各部分的职责如下:</p>
+
+<ul>
+  
<li>Define:主要负责定义数据质量统计的维度,比如数据质量统计的时间跨度、统计的目标(源端和目标端的数据数量是否一致,数据源里某一字段的非空的数量、不重复值的数量、最大值、最小值、top5的值数量等)</li>
+  <li>Measure:主要负责执行统计任务,生成统计结果</li>
+  <li>Analyze:主要负责保存与展示统计结果</li>
+</ul>
+
+<p>基于以上功能,我们大数据平台计划引入Griffin作为数据质量解决方案,实现数据一致性检查、空值统计等功能。以下是安装步骤总结:</p>
+
+<h3 id="安装部署">安装部署</h3>
+
+<h4 id="依赖准备">依赖准备</h4>
+
+<ul>
+  <li>JDK (1.8 or later versions)</li>
+  <li>MySQL(version 5.6及以上)</li>
+  <li>Hadoop (2.6.0 or later)</li>
+  <li>Hive (version 2.x)</li>
+  <li>Spark (version 2.2.1)</li>
+  <li>Livy(livy-0.5.0-incubating)</li>
+  <li>ElasticSearch (5.0 or later versions)</li>
+</ul>
+
+<h4 id="初始化">初始化</h4>
+
+<p>初始化操作具体请参考<a 
href="https://github.com/apache/griffin/blob/master/griffin-doc/deploy/deploy-guide.md";>Apache
 Griffin Deployment 
Guide</a>,由于我的测试环境中Hadoop集群、Hive集群已搭好,故这里省略Hadoop、Hive安装步骤,只保留拷贝配置文件、配置Hadoop配置文件目录步骤。</p>
+
+<p>1、MySQL:</p>
+
+<p>在MySQL中创建数据库quartz,然后执行<a 
href="https://github.com/apache/griffin/blob/master/service/src/main/resources/Init_quartz_mysql_innodb.sql";>Init_quartz_mysql_innodb.sql</a>脚本初始化表信息:</p>
+
+<div class="highlighter-rouge"><div class="highlight"><pre 
class="highlight"><code>mysql -u &lt;username&gt; -p &lt;password&gt; &lt; 
Init_quartz_mysql_innodb.sql
+</code></pre></div></div>
+
+<p>2、Hadoop和Hive:</p>
+
+<p>从Hadoop服务器拷贝配置文件到Livy服务器上,这里假设将配置文件放在/usr/data/conf目录下。</p>
+
+<p>在Hadoop服务器上创建/home/spark_conf目录,并将Hive的配置文件hive-site.xml上传到该目录下:</p>
+
+<div class="highlighter-rouge"><div class="highlight"><pre 
class="highlight"><code>#创建/home/spark_conf目录
+hadoop fs -mkdir -p /home/spark_conf
+#上传hive-site.xml
+hadoop fs -put hive-site.xml /home/spark_conf/
+</code></pre></div></div>
+
+<p>3、设置环境变量:</p>
+
+<div class="highlighter-rouge"><div class="highlight"><pre 
class="highlight"><code><span class="c">#!/bin/bash</span>
+<span class="nb">export </span><span class="nv">JAVA_HOME</span><span 
class="o">=</span>/data/jdk1.8.0_192
+
+<span class="c">#spark目录</span>
+<span class="nb">export </span><span class="nv">SPARK_HOME</span><span 
class="o">=</span>/usr/data/spark-2.1.1-bin-2.6.3
+<span class="c">#livy命令目录</span>
+<span class="nb">export </span><span class="nv">LIVY_HOME</span><span 
class="o">=</span>/usr/data/livy/bin
+<span class="c">#hadoop配置文件目录</span>
+<span class="nb">export </span><span class="nv">HADOOP_CONF_DIR</span><span 
class="o">=</span>/usr/data/conf
+</code></pre></div></div>
+
+<p>4、Livy配置:</p>
+
+<p>更新livy/conf下的livy.conf配置文件:</p>
+
+<div class="highlighter-rouge"><div class="highlight"><pre 
class="highlight"><code>livy.server.host = 127.0.0.1
+livy.spark.master = yarn
+livy.spark.deployMode = cluster
+livy.repl.enable-hive-context = true
+</code></pre></div></div>
+
+<p>启动livy:</p>
+
+<div class="highlighter-rouge"><div class="highlight"><pre 
class="highlight"><code>livy-server start
+</code></pre></div></div>
+
+<p>5、Elasticsearch配置:</p>
+
+<p>在ES里创建griffin索引:</p>
+
+<div class="highlighter-rouge"><div class="highlight"><pre 
class="highlight"><code>curl -XPUT http://es:9200/griffin -d '
+{
+    "aliases": {},
+    "mappings": {
+        "accuracy": {
+            "properties": {
+                "name": {
+                    "fields": {
+                        "keyword": {
+                            "ignore_above": 256,
+                            "type": "keyword"
+                        }
+                    },
+                    "type": "text"
+                },
+                "tmst": {
+                    "type": "date"
+                }
+            }
+        }
+    },
+    "settings": {
+        "index": {
+            "number_of_replicas": "2",
+            "number_of_shards": "5"
+        }
+    }
+}
+'
+</code></pre></div></div>
+
+<h4 id="源码打包部署">源码打包部署</h4>
+
+<p>在这里我使用源码编译打包的方式来部署Griffin,Griffin的源码地址是:<a 
href="https://github.com/apache/griffin.git";>https://github.com/apache/griffin.git</a>,这里我使用的源码tag是griffin-0.4.0,下载完成在idea中导入并展开源码的结构图如下:</p>
+
+<p><img src="/images/project.jpg" alt="project" /></p>
+
+<p>Griffin的源码结构很清晰,主要包括griffin-doc、measure、service和ui四个模块,其中griffin-doc负责存放Griffin的文档,measure负责与spark交互,执行统计任务,service使用spring
 boot作为服务实现,负责给ui模块提供交互所需的restful api,保存统计任务,展示统计结果。</p>
+
+<p>源码导入构建完毕后,需要修改配置文件,具体修改的配置文件如下:</p>
+
+<p>1、service/src/main/resources/application.properties:</p>
+
+<div class="highlighter-rouge"><div class="highlight"><pre 
class="highlight"><code># Apache Griffin应用名称
+spring.application.name=griffin_service
+# MySQL数据库配置信息
+spring.datasource.url=jdbc:mysql://10.104.20.126:3306/griffin_quartz?useSSL=false
+spring.datasource.username=xnuser
+spring.datasource.password=Xn20!@n0oLk
+spring.jpa.generate-ddl=true
+spring.datasource.driver-class-name=com.mysql.jdbc.Driver
+spring.jpa.show-sql=true
+# Hive metastore配置信息
+hive.metastore.uris=thrift://namenodetest01.bi:9083
+hive.metastore.dbname=default
+hive.hmshandler.retry.attempts=15
+hive.hmshandler.retry.interval=2000ms
+# Hive cache time
+cache.evict.hive.fixedRate.in.milliseconds=900000
+# Kafka schema registry,按需配置
+kafka.schema.registry.url=http://namenodetest01.bi:8081
+# Update job instance state at regular intervals
+jobInstance.fixedDelay.in.milliseconds=60000
+# Expired time of job instance which is 7 days that is 604800000 
milliseconds.Time unit only supports milliseconds
+jobInstance.expired.milliseconds=604800000
+# schedule predicate job every 5 minutes and repeat 12 times at most
+#interval time unit s:second m:minute h:hour d:day,only support these four 
units
+predicate.job.interval=5m
+predicate.job.repeat.count=12
+# external properties directory location
+external.config.location=
+# external BATCH or STREAMING env
+external.env.location=
+# login strategy ("default" or "ldap")
+login.strategy=default
+# ldap,登录策略为ldap时配置
+ldap.url=ldap://hostname:port
[email protected]
+ldap.searchBase=DC=org,DC=example
+ldap.searchPattern=(sAMAccountName={0})
+# hdfs default name
+fs.defaultFS=
+# elasticsearch配置
+elasticsearch.host=griffindq02-test1-rgtj1-tj1
+elasticsearch.port=9200
+elasticsearch.scheme=http
+# elasticsearch.user = user
+# elasticsearch.password = password
+# livy配置
+livy.uri=http://10.104.110.116:8998/batches
+# yarn url配置
+yarn.uri=http://10.104.110.116:8088
+# griffin event listener
+internal.event.listeners=GriffinJobEventHook
+</code></pre></div></div>
+
+<p>2、service/src/main/resources/quartz.properties</p>
+
+<div class="highlighter-rouge"><div class="highlight"><pre 
class="highlight"><code>#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+org.quartz.scheduler.instanceName=spring-boot-quartz
+org.quartz.scheduler.instanceId=AUTO
+org.quartz.threadPool.threadCount=5
+org.quartz.jobStore.class=org.quartz.impl.jdbcjobstore.JobStoreTX
+# If you use postgresql as your database,set this property value to 
org.quartz.impl.jdbcjobstore.PostgreSQLDelegate
+# If you use mysql as your database,set this property value to 
org.quartz.impl.jdbcjobstore.StdJDBCDelegate
+# If you use h2 as your database, it's ok to set this property value to 
StdJDBCDelegate, PostgreSQLDelegate or others
+org.quartz.jobStore.driverDelegateClass=org.quartz.impl.jdbcjobstore.StdJDBCDelegate
+org.quartz.jobStore.useProperties=true
+org.quartz.jobStore.misfireThreshold=60000
+org.quartz.jobStore.tablePrefix=QRTZ_
+org.quartz.jobStore.isClustered=true
+org.quartz.jobStore.clusterCheckinInterval=20000
+</code></pre></div></div>
+
+<p>3、service/src/main/resources/sparkProperties.json:</p>
+
+<div class="highlighter-rouge"><div class="highlight"><pre 
class="highlight"><code>{
+  "file": "hdfs:///griffin/griffin-measure.jar",
+  "className": "org.apache.griffin.measure.Application",
+  "name": "griffin",
+  "queue": "default",
+  "numExecutors": 2,
+  "executorCores": 1,
+  "driverMemory": "1g",
+  "executorMemory": "1g",
+  "conf": {
+    "spark.yarn.dist.files": "hdfs:///home/spark_conf/hive-site.xml"
+  },
+  "files": [
+  ]
+}
+</code></pre></div></div>
+
+<p>4、service/src/main/resources/env/env_batch.json:</p>
+
+<div class="highlighter-rouge"><div class="highlight"><pre 
class="highlight"><code>{
+  "spark": {
+    "log.level": "INFO"
+  },
+  "sinks": [
+    {
+      "type": "CONSOLE",
+      "config": {
+        "max.log.lines": 10
+      }
+    },
+    {
+      "type": "HDFS",
+      "config": {
+        "path": "hdfs://namenodetest01.bi.10101111.com:9001/griffin/persist",
+        "max.persist.lines": 10000,
+        "max.lines.per.file": 10000
+      }
+    },
+    {
+      "type": "ELASTICSEARCH",
+      "config": {
+        "method": "post",
+        "api": "http://10.104.110.119:9200/griffin/accuracy";,
+        "connection.timeout": "1m",
+        "retry": 10
+      }
+    }
+  ],
+  "griffin.checkpoint": []
+}
+</code></pre></div></div>
+
+<p>配置文件修改好后,在idea里的terminal里执行如下maven命令进行编译打包:</p>
+
+<div class="highlighter-rouge"><div class="highlight"><pre 
class="highlight"><code>mvn -Dmaven.test.skip=true clean install
+</code></pre></div></div>
+
+<p>命令执行完成后,会在service和measure模块的target目录下分别看到service-0.4.0.jar和measure-0.4.0.jar两个jar,将这两个jar分别拷贝到服务器目录下。这两个jar的使用方式如下:</p>
+
+<p>1、使用如下命令将measure-0.4.0.jar这个jar上传到HDFS的/griffin文件目录里:</p>
+
+<div class="highlighter-rouge"><div class="highlight"><pre 
class="highlight"><code>#改变jar名称
+mv measure-0.4.0.jar griffin-measure.jar
+#上传griffin-measure.jar到HDFS文件目录里
+hadoop fs -put measure-0.4.0.jar /griffin/
+</code></pre></div></div>
+
+<p>这样做的目的主要是因为spark在yarn集群上执行任务时,需要到HDFS的/griffin目录下加载griffin-measure.jar,避免发生类org.apache.griffin.measure.Application找不到的错误。</p>
+
+<p>2、运行service-0.4.0.jar,启动Griffin管理后台:</p>
+
+<div class="highlighter-rouge"><div class="highlight"><pre 
class="highlight"><code>nohup java -jar service-0.4.0.jar&gt;service.out 
2&gt;&amp;1 &amp;
+</code></pre></div></div>
+
+<p>几秒钟后,我们可以访问Apache Griffin的默认UI(默认情况下,spring boot的端口是8080)。</p>
+
+<div class="highlighter-rouge"><div class="highlight"><pre 
class="highlight"><code>http://IP:8080
+</code></pre></div></div>
+
+<p>UI操作文档链接:<a 
href="https://github.com/apache/griffin/blob/master/griffin-doc/ui/user-guide.md";>Apache
 Griffin User Guide</a>。通过UI操作界面,我们可以创建自己的统计任务,部分结果展示界面如下:</p>
+
+<p><img src="/images/dashboard-big.png" alt="dashboard" /></p>
+
+<h4 id="功能体验">功能体验</h4>
+
+<p>1、在hive里创建表demo_src和demo_tgt:</p>
+
+<div class="highlighter-rouge"><div class="highlight"><pre 
class="highlight"><code>--create hive tables here. hql script
+--Note: replace hdfs location with your own path
+CREATE EXTERNAL TABLE `demo_src`(
+  `id` bigint,
+  `age` int,
+  `desc` string) 
+PARTITIONED BY (
+  `dt` string,
+  `hour` string)
+ROW FORMAT DELIMITED
+  FIELDS TERMINATED BY '|'
+LOCATION
+  'hdfs:///griffin/data/batch/demo_src';
+
+--Note: replace hdfs location with your own path
+CREATE EXTERNAL TABLE `demo_tgt`(
+  `id` bigint,
+  `age` int,
+  `desc` string) 
+PARTITIONED BY (
+  `dt` string,
+  `hour` string)
+ROW FORMAT DELIMITED
+  FIELDS TERMINATED BY '|'
+LOCATION
+  'hdfs:///griffin/data/batch/demo_tgt';
+</code></pre></div></div>
+
+<p>2、生成测试数据:</p>
+
+<p>从<a 
href="http://griffin.apache.org/data/batch/";>http://griffin.apache.org/data/batch/</a>地址下载所有文件到Hadoop服务器上,然后使用如下命令执行gen-hive-data.sh脚本:</p>
+
+<div class="highlighter-rouge"><div class="highlight"><pre 
class="highlight"><code>nohup ./gen-hive-data.sh&gt;gen.out 2&gt;&amp;1 &amp;
+</code></pre></div></div>
+
+<p>注意观察gen.out日志文件,如果有错误,视情况进行调整。这里我的测试环境Hadoop和Hive安装在同一台服务器上,因此直接运行脚本。</p>
+
+<p>3、通过UI界面创建统计任务,具体按照<a 
href="https://github.com/apache/griffin/blob/master/griffin-doc/ui/user-guide.md";>Apache
 Griffin User Guide</a>
+一步步操作。</p>
+
+<h3 id="踩坑过程">踩坑过程</h3>
+
+<p>1、gen-hive-data.sh脚本生成数据失败,报no such file or directory错误。</p>
+
+<p>错误原因:HDFS中的/griffin/data/batch/demo_src/和/griffin/data/batch/demo_tgt/目录下”dt=时间”目录不存在,如dt=20190113。</p>
+
+<p>解决办法:给脚本中增加hadoop fs -mkdir创建目录操作,修改完后如下:</p>
+
+<div class="highlighter-rouge"><div class="highlight"><pre 
class="highlight"><code><span class="c">#!/bin/bash</span>
+
+<span class="c">#create table</span>
+hive <span class="nt">-f</span> create-table.hql
+<span class="nb">echo</span> <span class="s2">"create table done"</span>
+
+<span class="c">#current hour</span>
+<span class="nb">sudo</span> ./gen_demo_data.sh
+<span class="nv">cur_date</span><span class="o">=</span><span 
class="sb">`</span><span class="nb">date</span> +%Y%m%d%H<span 
class="sb">`</span>
+<span class="nv">dt</span><span class="o">=</span><span 
class="k">${</span><span class="nv">cur_date</span>:0:8<span class="k">}</span>
+<span class="nv">hour</span><span class="o">=</span><span 
class="k">${</span><span class="nv">cur_date</span>:8:2<span class="k">}</span>
+<span class="nv">partition_date</span><span class="o">=</span><span 
class="s2">"dt='</span><span class="nv">$dt</span><span 
class="s2">',hour='</span><span class="nv">$hour</span><span 
class="s2">'"</span>
+<span class="nb">sed </span>s/PARTITION_DATE/<span 
class="nv">$partition_date</span>/ ./insert-data.hql.template <span 
class="o">&gt;</span> insert-data.hql
+hive <span class="nt">-f</span> insert-data.hql
+<span class="nv">src_done_path</span><span 
class="o">=</span>/griffin/data/batch/demo_src/dt<span class="o">=</span><span 
class="k">${</span><span class="nv">dt</span><span class="k">}</span>/hour<span 
class="o">=</span><span class="k">${</span><span class="nv">hour</span><span 
class="k">}</span>/_DONE
+<span class="nv">tgt_done_path</span><span 
class="o">=</span>/griffin/data/batch/demo_tgt/dt<span class="o">=</span><span 
class="k">${</span><span class="nv">dt</span><span class="k">}</span>/hour<span 
class="o">=</span><span class="k">${</span><span class="nv">hour</span><span 
class="k">}</span>/_DONE
+hadoop fs <span class="nt">-mkdir</span> <span class="nt">-p</span> 
/griffin/data/batch/demo_src/dt<span class="o">=</span><span 
class="k">${</span><span class="nv">dt</span><span class="k">}</span>/hour<span 
class="o">=</span><span class="k">${</span><span class="nv">hour</span><span 
class="k">}</span>
+hadoop fs <span class="nt">-mkdir</span> <span class="nt">-p</span> 
/griffin/data/batch/demo_tgt/dt<span class="o">=</span><span 
class="k">${</span><span class="nv">dt</span><span class="k">}</span>/hour<span 
class="o">=</span><span class="k">${</span><span class="nv">hour</span><span 
class="k">}</span>
+hadoop fs <span class="nt">-touchz</span> <span class="k">${</span><span 
class="nv">src_done_path</span><span class="k">}</span>
+hadoop fs <span class="nt">-touchz</span> <span class="k">${</span><span 
class="nv">tgt_done_path</span><span class="k">}</span>
+<span class="nb">echo</span> <span class="s2">"insert data [</span><span 
class="nv">$partition_date</span><span class="s2">] done"</span>
+
+<span class="c">#last hour</span>
+<span class="nb">sudo</span> ./gen_demo_data.sh
+<span class="nv">cur_date</span><span class="o">=</span><span 
class="sb">`</span><span class="nb">date</span> <span class="nt">-d</span> 
<span class="s1">'1 hour ago'</span> +%Y%m%d%H<span class="sb">`</span>
+<span class="nv">dt</span><span class="o">=</span><span 
class="k">${</span><span class="nv">cur_date</span>:0:8<span class="k">}</span>
+<span class="nv">hour</span><span class="o">=</span><span 
class="k">${</span><span class="nv">cur_date</span>:8:2<span class="k">}</span>
+<span class="nv">partition_date</span><span class="o">=</span><span 
class="s2">"dt='</span><span class="nv">$dt</span><span 
class="s2">',hour='</span><span class="nv">$hour</span><span 
class="s2">'"</span>
+<span class="nb">sed </span>s/PARTITION_DATE/<span 
class="nv">$partition_date</span>/ ./insert-data.hql.template <span 
class="o">&gt;</span> insert-data.hql
+hive <span class="nt">-f</span> insert-data.hql
+<span class="nv">src_done_path</span><span 
class="o">=</span>/griffin/data/batch/demo_src/dt<span class="o">=</span><span 
class="k">${</span><span class="nv">dt</span><span class="k">}</span>/hour<span 
class="o">=</span><span class="k">${</span><span class="nv">hour</span><span 
class="k">}</span>/_DONE
+<span class="nv">tgt_done_path</span><span 
class="o">=</span>/griffin/data/batch/demo_tgt/dt<span class="o">=</span><span 
class="k">${</span><span class="nv">dt</span><span class="k">}</span>/hour<span 
class="o">=</span><span class="k">${</span><span class="nv">hour</span><span 
class="k">}</span>/_DONE
+hadoop fs <span class="nt">-mkdir</span> <span class="nt">-p</span> 
/griffin/data/batch/demo_src/dt<span class="o">=</span><span 
class="k">${</span><span class="nv">dt</span><span class="k">}</span>/hour<span 
class="o">=</span><span class="k">${</span><span class="nv">hour</span><span 
class="k">}</span>
+hadoop fs <span class="nt">-mkdir</span> <span class="nt">-p</span> 
/griffin/data/batch/demo_tgt/dt<span class="o">=</span><span 
class="k">${</span><span class="nv">dt</span><span class="k">}</span>/hour<span 
class="o">=</span><span class="k">${</span><span class="nv">hour</span><span 
class="k">}</span>
+hadoop fs <span class="nt">-touchz</span> <span class="k">${</span><span 
class="nv">src_done_path</span><span class="k">}</span>
+hadoop fs <span class="nt">-touchz</span> <span class="k">${</span><span 
class="nv">tgt_done_path</span><span class="k">}</span>
+<span class="nb">echo</span> <span class="s2">"insert data [</span><span 
class="nv">$partition_date</span><span class="s2">] done"</span>
+
+<span class="c">#next hours</span>
+<span class="nb">set</span> +e
+<span class="k">while </span><span class="nb">true
+</span><span class="k">do
+  </span><span class="nb">sudo</span> ./gen_demo_data.sh
+  <span class="nv">cur_date</span><span class="o">=</span><span 
class="sb">`</span><span class="nb">date</span> +%Y%m%d%H<span 
class="sb">`</span>
+  <span class="nv">next_date</span><span class="o">=</span><span 
class="sb">`</span><span class="nb">date</span> <span class="nt">-d</span> 
<span class="s2">"+1hour"</span> <span class="s1">'+%Y%m%d%H'</span><span 
class="sb">`</span>
+  <span class="nv">dt</span><span class="o">=</span><span 
class="k">${</span><span class="nv">next_date</span>:0:8<span class="k">}</span>
+  <span class="nv">hour</span><span class="o">=</span><span 
class="k">${</span><span class="nv">next_date</span>:8:2<span class="k">}</span>
+  <span class="nv">partition_date</span><span class="o">=</span><span 
class="s2">"dt='</span><span class="nv">$dt</span><span 
class="s2">',hour='</span><span class="nv">$hour</span><span 
class="s2">'"</span>
+  <span class="nb">sed </span>s/PARTITION_DATE/<span 
class="nv">$partition_date</span>/ ./insert-data.hql.template <span 
class="o">&gt;</span> insert-data.hql
+  hive <span class="nt">-f</span> insert-data.hql
+  <span class="nv">src_done_path</span><span 
class="o">=</span>/griffin/data/batch/demo_src/dt<span class="o">=</span><span 
class="k">${</span><span class="nv">dt</span><span class="k">}</span>/hour<span 
class="o">=</span><span class="k">${</span><span class="nv">hour</span><span 
class="k">}</span>/_DONE
+  <span class="nv">tgt_done_path</span><span 
class="o">=</span>/griffin/data/batch/demo_tgt/dt<span class="o">=</span><span 
class="k">${</span><span class="nv">dt</span><span class="k">}</span>/hour<span 
class="o">=</span><span class="k">${</span><span class="nv">hour</span><span 
class="k">}</span>/_DONE
+  hadoop fs <span class="nt">-mkdir</span> <span class="nt">-p</span> 
/griffin/data/batch/demo_src/dt<span class="o">=</span><span 
class="k">${</span><span class="nv">dt</span><span class="k">}</span>/hour<span 
class="o">=</span><span class="k">${</span><span class="nv">hour</span><span 
class="k">}</span>
+  hadoop fs <span class="nt">-mkdir</span> <span class="nt">-p</span> 
/griffin/data/batch/demo_tgt/dt<span class="o">=</span><span 
class="k">${</span><span class="nv">dt</span><span class="k">}</span>/hour<span 
class="o">=</span><span class="k">${</span><span class="nv">hour</span><span 
class="k">}</span>
+  hadoop fs <span class="nt">-touchz</span> <span class="k">${</span><span 
class="nv">src_done_path</span><span class="k">}</span>
+  hadoop fs <span class="nt">-touchz</span> <span class="k">${</span><span 
class="nv">tgt_done_path</span><span class="k">}</span>
+  <span class="nb">echo</span> <span class="s2">"insert data [</span><span 
class="nv">$partition_date</span><span class="s2">] done"</span>
+  <span class="nb">sleep </span>3600
+<span class="k">done
+</span><span class="nb">set</span> <span class="nt">-e</span>
+</code></pre></div></div>
+
+<p>2、HDFS的/griffin/persist目录下没有统计结果文件,检查该目录的权限,设置合适的权限即可。</p>
+
+<p>3、ES中的metric数据为空,有两种可能:</p>
+
+<ul>
+  <li>service/src/main/resources/env/env_batch.json里的ES配置信息不正确</li>
+  <li>执行spark任务的yarn服务器上没有配置ES服务器的hostname,连接异常</li>
+</ul>
+
+<p>4、启动service-0.4.0.jar之后,访问不到UI界面,查看启动日志无异常。检查打包时是不是执行的mvn 
package命令,将该命令替换成mvn -Dmaven.test.skip=true clean install命令重新打包启动即可。</p>
+
+
+      </div><!--end of loadcontent-->
+    </div>
+    <!--end of centered content-->
+  </div>
+</div>
+<!--end of container-->
+
+
+<!-- footer start -->
+<div class="footerwrapper">
+    <div class="container">
+        <div class="row">
+            <div class="col-md-3">
+                <img src="/images/incubator_feather_egg_logo.png" height="60">
+            </div>
+            <div class="col-md-9">
+                <div style="margin-left:auto; margin-right:auto; 
text-align:center;font-size:12px;">
+                    <div>
+                        Apache Griffin is an effort undergoing incubation at 
The Apache Software Foundation (ASF), sponsored by the Apache Incubator. 
Incubation is required of all newly accepted projects until a further review 
indicates that the infrastructure, communications, and decision making process 
have stabilized in a manner consistent with other successful ASF projects. 
While incubation status is not necessarily a reflection of the completeness or 
stability of the code, it does i [...]
+                    </div>
+                </div>
+            </div>
+        </div>
+        <div class="row" style="padding-top:10px;">
+            Copyright © 2018 The Apache Software Foundation, Licensed under 
the <a href="http://www.apache.org/licenses/LICENSE-2.0";>Apache License, 
Version 2.0</a>.<br>
+                       Apache Griffin, Griffin, Apache, the Apache feather 
logo and the Apache Griffin logo are trademarks of The Apache Software 
Foundation.
+        </div>
+               <div class="row text-center" style="padding-top:10px;">
+                       <a 
href="https://www.apache.org/events/current-event.html";>
+                               <img 
src="https://www.apache.org/events/current-event-234x60.png"; alt="ASF Current 
Event">
+                       </a>
+               </div>
+    </div>
+</div>
+<!-- footer end -->
+
+<!-- JavaScripts -->
+<script src="https://code.jquery.com/jquery-2.2.4.min.js";></script>
+
+
+
+</body>
+</html>
diff --git a/docs/quickstart.html b/docs/quickstart.html
index 512b63d..dba69d5 100644
--- a/docs/quickstart.html
+++ b/docs/quickstart.html
@@ -97,6 +97,8 @@ under the License.
         
           <li class="sidenavli  current"><a href="/docs/quickstart.html" 
data-permalink="/docs/quickstart.html" id="">Quick Start</a></li>
         
+          <li class="sidenavli  "><a href="/docs/quickstart-cn.html" 
data-permalink="/docs/quickstart.html" id="">Quick Start (Chinese 
Version)</a></li>
+        
           <li class="sidenavli  "><a href="/docs/usecases.html" 
data-permalink="/docs/quickstart.html" id="">Streaming Use Cases</a></li>
         
           <li class="sidenavli  "><a href="/docs/profiling.html" 
data-permalink="/docs/quickstart.html" id="">Profiling Use Cases</a></li>
diff --git a/docs/usecases.html b/docs/usecases.html
index 7cdc993..4d186b2 100644
--- a/docs/usecases.html
+++ b/docs/usecases.html
@@ -97,6 +97,8 @@ under the License.
         
           <li class="sidenavli  "><a href="/docs/quickstart.html" 
data-permalink="/docs/usecases.html" id="">Quick Start</a></li>
         
+          <li class="sidenavli  "><a href="/docs/quickstart-cn.html" 
data-permalink="/docs/usecases.html" id="">Quick Start (Chinese 
Version)</a></li>
+        
           <li class="sidenavli  current"><a href="/docs/usecases.html" 
data-permalink="/docs/usecases.html" id="">Streaming Use Cases</a></li>
         
           <li class="sidenavli  "><a href="/docs/profiling.html" 
data-permalink="/docs/usecases.html" id="">Profiling Use Cases</a></li>
diff --git a/images/arch-1.png b/images/arch-1.png
new file mode 100644
index 0000000..93bc755
Binary files /dev/null and b/images/arch-1.png differ
diff --git a/images/dashboard-big.png b/images/dashboard-big.png
new file mode 100644
index 0000000..aa796b6
Binary files /dev/null and b/images/dashboard-big.png differ
diff --git a/images/project.jpg b/images/project.jpg
new file mode 100644
index 0000000..6f446f2
Binary files /dev/null and b/images/project.jpg differ

Reply via email to