This is an automated email from the ASF dual-hosted git repository.
guoyp pushed a commit to branch asf-site
in repository https://gitbox.apache.org/repos/asf/griffin-site.git
The following commit(s) were added to refs/heads/asf-site by this push:
new d286af0 Updated asf-site site from master
(3f07e1dafdfe10546b8b1e337690b060b839513e)
d286af0 is described below
commit d286af0c5046a2742ed4006062471744696f84a5
Author: William Guo <[email protected]>
AuthorDate: Mon Jan 21 21:41:17 2019 +0800
Updated asf-site site from master (3f07e1dafdfe10546b8b1e337690b060b839513e)
---
docs/community.html | 2 +
docs/conf.html | 2 +
docs/contribute.html | 2 +
docs/contributors.html | 2 +
docs/download.html | 2 +
docs/latest.html | 2 +
docs/profiling.html | 2 +
docs/quickstart-cn.html | 606 +++++++++++++++++++++++++++++++++++++++++++++++
docs/quickstart.html | 2 +
docs/usecases.html | 2 +
images/arch-1.png | Bin 0 -> 307285 bytes
images/dashboard-big.png | Bin 0 -> 170904 bytes
images/project.jpg | Bin 0 -> 59210 bytes
13 files changed, 624 insertions(+)
diff --git a/docs/community.html b/docs/community.html
index 0b74114..25b9769 100644
--- a/docs/community.html
+++ b/docs/community.html
@@ -97,6 +97,8 @@ under the License.
<li class="sidenavli "><a href="/docs/quickstart.html"
data-permalink="/docs/community.html" id="">Quick Start</a></li>
+ <li class="sidenavli "><a href="/docs/quickstart-cn.html"
data-permalink="/docs/community.html" id="">Quick Start (Chinese
Version)</a></li>
+
<li class="sidenavli "><a href="/docs/usecases.html"
data-permalink="/docs/community.html" id="">Streaming Use Cases</a></li>
<li class="sidenavli "><a href="/docs/profiling.html"
data-permalink="/docs/community.html" id="">Profiling Use Cases</a></li>
diff --git a/docs/conf.html b/docs/conf.html
index 111dccc..d6950be 100644
--- a/docs/conf.html
+++ b/docs/conf.html
@@ -97,6 +97,8 @@ under the License.
<li class="sidenavli "><a href="/docs/quickstart.html"
data-permalink="/docs/conf.html" id="">Quick Start</a></li>
+ <li class="sidenavli "><a href="/docs/quickstart-cn.html"
data-permalink="/docs/conf.html" id="">Quick Start (Chinese Version)</a></li>
+
<li class="sidenavli "><a href="/docs/usecases.html"
data-permalink="/docs/conf.html" id="">Streaming Use Cases</a></li>
<li class="sidenavli "><a href="/docs/profiling.html"
data-permalink="/docs/conf.html" id="">Profiling Use Cases</a></li>
diff --git a/docs/contribute.html b/docs/contribute.html
index 032b067..9cd2c88 100644
--- a/docs/contribute.html
+++ b/docs/contribute.html
@@ -97,6 +97,8 @@ under the License.
<li class="sidenavli "><a href="/docs/quickstart.html"
data-permalink="/docs/contribute.html" id="">Quick Start</a></li>
+ <li class="sidenavli "><a href="/docs/quickstart-cn.html"
data-permalink="/docs/contribute.html" id="">Quick Start (Chinese
Version)</a></li>
+
<li class="sidenavli "><a href="/docs/usecases.html"
data-permalink="/docs/contribute.html" id="">Streaming Use Cases</a></li>
<li class="sidenavli "><a href="/docs/profiling.html"
data-permalink="/docs/contribute.html" id="">Profiling Use Cases</a></li>
diff --git a/docs/contributors.html b/docs/contributors.html
index 84ed7e9..ee59c8e 100644
--- a/docs/contributors.html
+++ b/docs/contributors.html
@@ -97,6 +97,8 @@ under the License.
<li class="sidenavli "><a href="/docs/quickstart.html"
data-permalink="/docs/contributors.html" id="">Quick Start</a></li>
+ <li class="sidenavli "><a href="/docs/quickstart-cn.html"
data-permalink="/docs/contributors.html" id="">Quick Start (Chinese
Version)</a></li>
+
<li class="sidenavli "><a href="/docs/usecases.html"
data-permalink="/docs/contributors.html" id="">Streaming Use Cases</a></li>
<li class="sidenavli "><a href="/docs/profiling.html"
data-permalink="/docs/contributors.html" id="">Profiling Use Cases</a></li>
diff --git a/docs/download.html b/docs/download.html
index bda312b..c461921 100644
--- a/docs/download.html
+++ b/docs/download.html
@@ -97,6 +97,8 @@ under the License.
<li class="sidenavli "><a href="/docs/quickstart.html"
data-permalink="/docs/download.html" id="">Quick Start</a></li>
+ <li class="sidenavli "><a href="/docs/quickstart-cn.html"
data-permalink="/docs/download.html" id="">Quick Start (Chinese
Version)</a></li>
+
<li class="sidenavli "><a href="/docs/usecases.html"
data-permalink="/docs/download.html" id="">Streaming Use Cases</a></li>
<li class="sidenavli "><a href="/docs/profiling.html"
data-permalink="/docs/download.html" id="">Profiling Use Cases</a></li>
diff --git a/docs/latest.html b/docs/latest.html
index fe16d28..0149f1d 100644
--- a/docs/latest.html
+++ b/docs/latest.html
@@ -97,6 +97,8 @@ under the License.
<li class="sidenavli "><a href="/docs/quickstart.html"
data-permalink="/docs/latest.html" id="">Quick Start</a></li>
+ <li class="sidenavli "><a href="/docs/quickstart-cn.html"
data-permalink="/docs/latest.html" id="">Quick Start (Chinese Version)</a></li>
+
<li class="sidenavli "><a href="/docs/usecases.html"
data-permalink="/docs/latest.html" id="">Streaming Use Cases</a></li>
<li class="sidenavli "><a href="/docs/profiling.html"
data-permalink="/docs/latest.html" id="">Profiling Use Cases</a></li>
diff --git a/docs/profiling.html b/docs/profiling.html
index f9bdfe1..86c18f9 100644
--- a/docs/profiling.html
+++ b/docs/profiling.html
@@ -97,6 +97,8 @@ under the License.
<li class="sidenavli "><a href="/docs/quickstart.html"
data-permalink="/docs/profiling.html" id="">Quick Start</a></li>
+ <li class="sidenavli "><a href="/docs/quickstart-cn.html"
data-permalink="/docs/profiling.html" id="">Quick Start (Chinese
Version)</a></li>
+
<li class="sidenavli "><a href="/docs/usecases.html"
data-permalink="/docs/profiling.html" id="">Streaming Use Cases</a></li>
<li class="sidenavli current"><a href="/docs/profiling.html"
data-permalink="/docs/profiling.html" id="">Profiling Use Cases</a></li>
diff --git a/docs/quickstart-cn.html b/docs/quickstart-cn.html
new file mode 100644
index 0000000..435c5a7
--- /dev/null
+++ b/docs/quickstart-cn.html
@@ -0,0 +1,606 @@
+<!DOCTYPE html>
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied. See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+<head>
+ <meta charset="utf-8">
+ <meta http-equiv="X-UA-Compatible" content="IE=edge">
+
+ <title>Griffin - Quick Start</title>
+ <meta name="description" content="Apache Griffin - Big Data Quality
Solution For Batch and Streaming">
+
+ <meta name="keywords" content="Griffin, Hadoop, Security, Real Time">
+ <meta name="author" content="eBay Inc.">
+
+ <meta charset="utf-8">
+ <meta name="viewport" content="initial-scale=1">
+
+ <link rel="stylesheet" href="/css/animate.css">
+ <link rel="stylesheet" href="/css/bootstrap.min.css">
+
+ <link rel="stylesheet" href="/css/font-awesome.min.css">
+
+ <link rel="stylesheet" href="/css/misc.css">
+ <link rel="stylesheet" href="/css/style.css">
+ <link rel="stylesheet" href="/css/styles.css">
+ <link rel="stylesheet" href="/css/main.css">
+ <link rel="alternate" type="application/rss+xml" title="Griffin"
href="http://griffin.apache.org/feed.xml" />
+ <link rel="shortcut icon" href="/images/favicon.ico">
+
+ <!-- Baidu Analytics Tracking-->
+ <script>
+ var _hmt = _hmt || [];
+ (function() {
+ var hm = document.createElement("script");
+ hm.src = "//hm.baidu.com/hm.js?fedc55df2ea52777a679192e8f849ece";
+ var s = document.getElementsByTagName("script")[0];
+ s.parentNode.insertBefore(hm, s);
+ })();
+ </script>
+
+ <!-- Google Analytics Tracking -->
+ <script>
+
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+ (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new
Date();a=s.createElement(o),
+
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+
})(window,document,'script','//www.google-analytics.com/analytics.js','ga');
+ ga('create', 'UA-68929805-1', 'auto');
+ ga('send', 'pageview');
+ </script>
+</head>
+
+<body>
+<!-- header start -->
+<div id="home_page">
+ <div class="topbar">
+ <div class="container">
+ <div class="row" >
+ <nav class="navbar navbar-default">
+ <div class="container-fluid">
+ <!-- Brand and toggle get grouped for better mobile display -->
+ <div class="navbar-header">
+ <button type="button" class="navbar-toggle collapsed"
data-toggle="collapse" data-target="#bs-example-navbar-collapse-1"> <span
class="sr-only">Toggle navigation</span> <span class="icon-bar"></span> <span
class="icon-bar"></span> <span class="icon-bar"></span> </button>
+ <a class="navbar-brand" href="/"><img src="/images/logo.png"
height="44px" style="margin-top:-7px"></a> </div>
+ </div>
+ </div>
+ <!-- /.container-fluid -->
+ </nav>
+ </div>
+ </div>
+ </div>
+
+</div>
+<!-- header end -->
+<div class="container-fluid page-content">
+ <div class="row">
+ <div class="col-md-10 col-md-offset-1">
+ <!-- sidebar -->
+ <div class="col-xs-6 col-sm-3" id="sidebar" role="navigation">
+ <ul class="nav" id="adminnav">
+
+ <li class="heading">Getting Started</li>
+
+ <li class="sidenavli "><a href="/docs/quickstart.html"
data-permalink="/docs/quickstart-cn.html" id="">Quick Start</a></li>
+
+ <li class="sidenavli current"><a href="/docs/quickstart-cn.html"
data-permalink="/docs/quickstart-cn.html" id="">Quick Start (Chinese
Version)</a></li>
+
+ <li class="sidenavli "><a href="/docs/usecases.html"
data-permalink="/docs/quickstart-cn.html" id="">Streaming Use Cases</a></li>
+
+ <li class="sidenavli "><a href="/docs/profiling.html"
data-permalink="/docs/quickstart-cn.html" id="">Profiling Use Cases</a></li>
+
+ <li class="sidenavli "><a href="/docs/community.html"
data-permalink="/docs/quickstart-cn.html" id="">Community</a></li>
+
+ <li class="sidenavli "><a href="/docs/conf.html"
data-permalink="/docs/quickstart-cn.html" id="">Conference</a></li>
+
+ <li class="divider"></li>
+
+ <li class="heading">Development</li>
+
+ <li class="sidenavli "><a href="/docs/contribute.html"
data-permalink="/docs/quickstart-cn.html" id="">Contribution</a></li>
+
+ <li class="sidenavli "><a href="/docs/contributors.html"
data-permalink="/docs/quickstart-cn.html" id="">Contributors</a></li>
+
+ <li class="divider"></li>
+
+ <li class="heading">Download</li>
+
+ <li class="sidenavli "><a href="/docs/latest.html"
data-permalink="/docs/quickstart-cn.html" id="">Latest version</a></li>
+
+ <li class="sidenavli "><a href="/docs/download.html"
data-permalink="/docs/quickstart-cn.html" id="">Archived</a></li>
+
+ <li class="divider"></li>
+
+ <li class="sidenavli">
+ <a href="mailto:[email protected]" target="_blank">Need
Help?</a>
+ </li>
+ </ul>
+ </div>
+ <div class="col-xs-6 col-sm-9 page-main-content" style="margin-left:
-15px" id="loadcontent">
+ <h1 class="page-header" style="margin-top: 0px">Quick Start</h1>
+ <h2 id="apache-griffin-入门指南">Apache Griffin 入门指南</h2>
+
+<p>数据质量模块是大数据平台中必不可少的一个功能组件,<a href="http://griffin.apache.org">Apache
Griffin</a>(以下简称Griffin)是一个开源的大数据数据质量解决方案,它支持批处理和流模式两种数据质量检测方式,可以从不同维度(比如离线任务执行完毕后检查源端和目标端的数据数量是否一致、源表的数据空值数量等)度量数据资产,从而提升数据的准确度、可信度。</p>
+
+<p>在Griffin的架构中,主要分为Define、Measure和Analyze三个部分,如下图所示:</p>
+
+<p><img src="/images/arch-1.png" alt="arch" /></p>
+
+<p>各部分的职责如下:</p>
+
+<ul>
+
<li>Define:主要负责定义数据质量统计的维度,比如数据质量统计的时间跨度、统计的目标(源端和目标端的数据数量是否一致,数据源里某一字段的非空的数量、不重复值的数量、最大值、最小值、top5的值数量等)</li>
+ <li>Measure:主要负责执行统计任务,生成统计结果</li>
+ <li>Analyze:主要负责保存与展示统计结果</li>
+</ul>
+
+<p>基于以上功能,我们大数据平台计划引入Griffin作为数据质量解决方案,实现数据一致性检查、空值统计等功能。以下是安装步骤总结:</p>
+
+<h3 id="安装部署">安装部署</h3>
+
+<h4 id="依赖准备">依赖准备</h4>
+
+<ul>
+ <li>JDK (1.8 or later versions)</li>
+ <li>MySQL(version 5.6及以上)</li>
+ <li>Hadoop (2.6.0 or later)</li>
+ <li>Hive (version 2.x)</li>
+ <li>Spark (version 2.2.1)</li>
+ <li>Livy(livy-0.5.0-incubating)</li>
+ <li>ElasticSearch (5.0 or later versions)</li>
+</ul>
+
+<h4 id="初始化">初始化</h4>
+
+<p>初始化操作具体请参考<a
href="https://github.com/apache/griffin/blob/master/griffin-doc/deploy/deploy-guide.md">Apache
Griffin Deployment
Guide</a>,由于我的测试环境中Hadoop集群、Hive集群已搭好,故这里省略Hadoop、Hive安装步骤,只保留拷贝配置文件、配置Hadoop配置文件目录步骤。</p>
+
+<p>1、MySQL:</p>
+
+<p>在MySQL中创建数据库quartz,然后执行<a
href="https://github.com/apache/griffin/blob/master/service/src/main/resources/Init_quartz_mysql_innodb.sql">Init_quartz_mysql_innodb.sql</a>脚本初始化表信息:</p>
+
+<div class="highlighter-rouge"><div class="highlight"><pre
class="highlight"><code>mysql -u <username> -p <password> <
Init_quartz_mysql_innodb.sql
+</code></pre></div></div>
+
+<p>2、Hadoop和Hive:</p>
+
+<p>从Hadoop服务器拷贝配置文件到Livy服务器上,这里假设将配置文件放在/usr/data/conf目录下。</p>
+
+<p>在Hadoop服务器上创建/home/spark_conf目录,并将Hive的配置文件hive-site.xml上传到该目录下:</p>
+
+<div class="highlighter-rouge"><div class="highlight"><pre
class="highlight"><code>#创建/home/spark_conf目录
+hadoop fs -mkdir -p /home/spark_conf
+#上传hive-site.xml
+hadoop fs -put hive-site.xml /home/spark_conf/
+</code></pre></div></div>
+
+<p>3、设置环境变量:</p>
+
+<div class="highlighter-rouge"><div class="highlight"><pre
class="highlight"><code><span class="c">#!/bin/bash</span>
+<span class="nb">export </span><span class="nv">JAVA_HOME</span><span
class="o">=</span>/data/jdk1.8.0_192
+
+<span class="c">#spark目录</span>
+<span class="nb">export </span><span class="nv">SPARK_HOME</span><span
class="o">=</span>/usr/data/spark-2.1.1-bin-2.6.3
+<span class="c">#livy命令目录</span>
+<span class="nb">export </span><span class="nv">LIVY_HOME</span><span
class="o">=</span>/usr/data/livy/bin
+<span class="c">#hadoop配置文件目录</span>
+<span class="nb">export </span><span class="nv">HADOOP_CONF_DIR</span><span
class="o">=</span>/usr/data/conf
+</code></pre></div></div>
+
+<p>4、Livy配置:</p>
+
+<p>更新livy/conf下的livy.conf配置文件:</p>
+
+<div class="highlighter-rouge"><div class="highlight"><pre
class="highlight"><code>livy.server.host = 127.0.0.1
+livy.spark.master = yarn
+livy.spark.deployMode = cluster
+livy.repl.enable-hive-context = true
+</code></pre></div></div>
+
+<p>启动livy:</p>
+
+<div class="highlighter-rouge"><div class="highlight"><pre
class="highlight"><code>livy-server start
+</code></pre></div></div>
+
+<p>5、Elasticsearch配置:</p>
+
+<p>在ES里创建griffin索引:</p>
+
+<div class="highlighter-rouge"><div class="highlight"><pre
class="highlight"><code>curl -XPUT http://es:9200/griffin -d '
+{
+ "aliases": {},
+ "mappings": {
+ "accuracy": {
+ "properties": {
+ "name": {
+ "fields": {
+ "keyword": {
+ "ignore_above": 256,
+ "type": "keyword"
+ }
+ },
+ "type": "text"
+ },
+ "tmst": {
+ "type": "date"
+ }
+ }
+ }
+ },
+ "settings": {
+ "index": {
+ "number_of_replicas": "2",
+ "number_of_shards": "5"
+ }
+ }
+}
+'
+</code></pre></div></div>
+
+<h4 id="源码打包部署">源码打包部署</h4>
+
+<p>在这里我使用源码编译打包的方式来部署Griffin,Griffin的源码地址是:<a
href="https://github.com/apache/griffin.git">https://github.com/apache/griffin.git</a>,这里我使用的源码tag是griffin-0.4.0,下载完成在idea中导入并展开源码的结构图如下:</p>
+
+<p><img src="/images/project.jpg" alt="project" /></p>
+
+<p>Griffin的源码结构很清晰,主要包括griffin-doc、measure、service和ui四个模块,其中griffin-doc负责存放Griffin的文档,measure负责与spark交互,执行统计任务,service使用spring
boot作为服务实现,负责给ui模块提供交互所需的restful api,保存统计任务,展示统计结果。</p>
+
+<p>源码导入构建完毕后,需要修改配置文件,具体修改的配置文件如下:</p>
+
+<p>1、service/src/main/resources/application.properties:</p>
+
+<div class="highlighter-rouge"><div class="highlight"><pre
class="highlight"><code># Apache Griffin应用名称
+spring.application.name=griffin_service
+# MySQL数据库配置信息
+spring.datasource.url=jdbc:mysql://10.104.20.126:3306/griffin_quartz?useSSL=false
+spring.datasource.username=xnuser
+spring.datasource.password=Xn20!@n0oLk
+spring.jpa.generate-ddl=true
+spring.datasource.driver-class-name=com.mysql.jdbc.Driver
+spring.jpa.show-sql=true
+# Hive metastore配置信息
+hive.metastore.uris=thrift://namenodetest01.bi:9083
+hive.metastore.dbname=default
+hive.hmshandler.retry.attempts=15
+hive.hmshandler.retry.interval=2000ms
+# Hive cache time
+cache.evict.hive.fixedRate.in.milliseconds=900000
+# Kafka schema registry,按需配置
+kafka.schema.registry.url=http://namenodetest01.bi:8081
+# Update job instance state at regular intervals
+jobInstance.fixedDelay.in.milliseconds=60000
+# Expired time of job instance which is 7 days that is 604800000
milliseconds.Time unit only supports milliseconds
+jobInstance.expired.milliseconds=604800000
+# schedule predicate job every 5 minutes and repeat 12 times at most
+#interval time unit s:second m:minute h:hour d:day,only support these four
units
+predicate.job.interval=5m
+predicate.job.repeat.count=12
+# external properties directory location
+external.config.location=
+# external BATCH or STREAMING env
+external.env.location=
+# login strategy ("default" or "ldap")
+login.strategy=default
+# ldap,登录策略为ldap时配置
+ldap.url=ldap://hostname:port
[email protected]
+ldap.searchBase=DC=org,DC=example
+ldap.searchPattern=(sAMAccountName={0})
+# hdfs default name
+fs.defaultFS=
+# elasticsearch配置
+elasticsearch.host=griffindq02-test1-rgtj1-tj1
+elasticsearch.port=9200
+elasticsearch.scheme=http
+# elasticsearch.user = user
+# elasticsearch.password = password
+# livy配置
+livy.uri=http://10.104.110.116:8998/batches
+# yarn url配置
+yarn.uri=http://10.104.110.116:8088
+# griffin event listener
+internal.event.listeners=GriffinJobEventHook
+</code></pre></div></div>
+
+<p>2、service/src/main/resources/quartz.properties</p>
+
+<div class="highlighter-rouge"><div class="highlight"><pre
class="highlight"><code>#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+org.quartz.scheduler.instanceName=spring-boot-quartz
+org.quartz.scheduler.instanceId=AUTO
+org.quartz.threadPool.threadCount=5
+org.quartz.jobStore.class=org.quartz.impl.jdbcjobstore.JobStoreTX
+# If you use postgresql as your database,set this property value to
org.quartz.impl.jdbcjobstore.PostgreSQLDelegate
+# If you use mysql as your database,set this property value to
org.quartz.impl.jdbcjobstore.StdJDBCDelegate
+# If you use h2 as your database, it's ok to set this property value to
StdJDBCDelegate, PostgreSQLDelegate or others
+org.quartz.jobStore.driverDelegateClass=org.quartz.impl.jdbcjobstore.StdJDBCDelegate
+org.quartz.jobStore.useProperties=true
+org.quartz.jobStore.misfireThreshold=60000
+org.quartz.jobStore.tablePrefix=QRTZ_
+org.quartz.jobStore.isClustered=true
+org.quartz.jobStore.clusterCheckinInterval=20000
+</code></pre></div></div>
+
+<p>3、service/src/main/resources/sparkProperties.json:</p>
+
+<div class="highlighter-rouge"><div class="highlight"><pre
class="highlight"><code>{
+ "file": "hdfs:///griffin/griffin-measure.jar",
+ "className": "org.apache.griffin.measure.Application",
+ "name": "griffin",
+ "queue": "default",
+ "numExecutors": 2,
+ "executorCores": 1,
+ "driverMemory": "1g",
+ "executorMemory": "1g",
+ "conf": {
+ "spark.yarn.dist.files": "hdfs:///home/spark_conf/hive-site.xml"
+ },
+ "files": [
+ ]
+}
+</code></pre></div></div>
+
+<p>4、service/src/main/resources/env/env_batch.json:</p>
+
+<div class="highlighter-rouge"><div class="highlight"><pre
class="highlight"><code>{
+ "spark": {
+ "log.level": "INFO"
+ },
+ "sinks": [
+ {
+ "type": "CONSOLE",
+ "config": {
+ "max.log.lines": 10
+ }
+ },
+ {
+ "type": "HDFS",
+ "config": {
+ "path": "hdfs://namenodetest01.bi.10101111.com:9001/griffin/persist",
+ "max.persist.lines": 10000,
+ "max.lines.per.file": 10000
+ }
+ },
+ {
+ "type": "ELASTICSEARCH",
+ "config": {
+ "method": "post",
+ "api": "http://10.104.110.119:9200/griffin/accuracy",
+ "connection.timeout": "1m",
+ "retry": 10
+ }
+ }
+ ],
+ "griffin.checkpoint": []
+}
+</code></pre></div></div>
+
+<p>配置文件修改好后,在idea里的terminal里执行如下maven命令进行编译打包:</p>
+
+<div class="highlighter-rouge"><div class="highlight"><pre
class="highlight"><code>mvn -Dmaven.test.skip=true clean install
+</code></pre></div></div>
+
+<p>命令执行完成后,会在service和measure模块的target目录下分别看到service-0.4.0.jar和measure-0.4.0.jar两个jar,将这两个jar分别拷贝到服务器目录下。这两个jar的使用方式如下:</p>
+
+<p>1、使用如下命令将measure-0.4.0.jar这个jar上传到HDFS的/griffin文件目录里:</p>
+
+<div class="highlighter-rouge"><div class="highlight"><pre
class="highlight"><code>#改变jar名称
+mv measure-0.4.0.jar griffin-measure.jar
+#上传griffin-measure.jar到HDFS文件目录里
+hadoop fs -put measure-0.4.0.jar /griffin/
+</code></pre></div></div>
+
+<p>这样做的目的主要是因为spark在yarn集群上执行任务时,需要到HDFS的/griffin目录下加载griffin-measure.jar,避免发生类org.apache.griffin.measure.Application找不到的错误。</p>
+
+<p>2、运行service-0.4.0.jar,启动Griffin管理后台:</p>
+
+<div class="highlighter-rouge"><div class="highlight"><pre
class="highlight"><code>nohup java -jar service-0.4.0.jar>service.out
2>&1 &
+</code></pre></div></div>
+
+<p>几秒钟后,我们可以访问Apache Griffin的默认UI(默认情况下,spring boot的端口是8080)。</p>
+
+<div class="highlighter-rouge"><div class="highlight"><pre
class="highlight"><code>http://IP:8080
+</code></pre></div></div>
+
+<p>UI操作文档链接:<a
href="https://github.com/apache/griffin/blob/master/griffin-doc/ui/user-guide.md">Apache
Griffin User Guide</a>。通过UI操作界面,我们可以创建自己的统计任务,部分结果展示界面如下:</p>
+
+<p><img src="/images/dashboard-big.png" alt="dashboard" /></p>
+
+<h4 id="功能体验">功能体验</h4>
+
+<p>1、在hive里创建表demo_src和demo_tgt:</p>
+
+<div class="highlighter-rouge"><div class="highlight"><pre
class="highlight"><code>--create hive tables here. hql script
+--Note: replace hdfs location with your own path
+CREATE EXTERNAL TABLE `demo_src`(
+ `id` bigint,
+ `age` int,
+ `desc` string)
+PARTITIONED BY (
+ `dt` string,
+ `hour` string)
+ROW FORMAT DELIMITED
+ FIELDS TERMINATED BY '|'
+LOCATION
+ 'hdfs:///griffin/data/batch/demo_src';
+
+--Note: replace hdfs location with your own path
+CREATE EXTERNAL TABLE `demo_tgt`(
+ `id` bigint,
+ `age` int,
+ `desc` string)
+PARTITIONED BY (
+ `dt` string,
+ `hour` string)
+ROW FORMAT DELIMITED
+ FIELDS TERMINATED BY '|'
+LOCATION
+ 'hdfs:///griffin/data/batch/demo_tgt';
+</code></pre></div></div>
+
+<p>2、生成测试数据:</p>
+
+<p>从<a
href="http://griffin.apache.org/data/batch/">http://griffin.apache.org/data/batch/</a>地址下载所有文件到Hadoop服务器上,然后使用如下命令执行gen-hive-data.sh脚本:</p>
+
+<div class="highlighter-rouge"><div class="highlight"><pre
class="highlight"><code>nohup ./gen-hive-data.sh>gen.out 2>&1 &
+</code></pre></div></div>
+
+<p>注意观察gen.out日志文件,如果有错误,视情况进行调整。这里我的测试环境Hadoop和Hive安装在同一台服务器上,因此直接运行脚本。</p>
+
+<p>3、通过UI界面创建统计任务,具体按照<a
href="https://github.com/apache/griffin/blob/master/griffin-doc/ui/user-guide.md">Apache
Griffin User Guide</a>
+一步步操作。</p>
+
+<h3 id="踩坑过程">踩坑过程</h3>
+
+<p>1、gen-hive-data.sh脚本生成数据失败,报no such file or directory错误。</p>
+
+<p>错误原因:HDFS中的/griffin/data/batch/demo_src/和/griffin/data/batch/demo_tgt/目录下”dt=时间”目录不存在,如dt=20190113。</p>
+
+<p>解决办法:给脚本中增加hadoop fs -mkdir创建目录操作,修改完后如下:</p>
+
+<div class="highlighter-rouge"><div class="highlight"><pre
class="highlight"><code><span class="c">#!/bin/bash</span>
+
+<span class="c">#create table</span>
+hive <span class="nt">-f</span> create-table.hql
+<span class="nb">echo</span> <span class="s2">"create table done"</span>
+
+<span class="c">#current hour</span>
+<span class="nb">sudo</span> ./gen_demo_data.sh
+<span class="nv">cur_date</span><span class="o">=</span><span
class="sb">`</span><span class="nb">date</span> +%Y%m%d%H<span
class="sb">`</span>
+<span class="nv">dt</span><span class="o">=</span><span
class="k">${</span><span class="nv">cur_date</span>:0:8<span class="k">}</span>
+<span class="nv">hour</span><span class="o">=</span><span
class="k">${</span><span class="nv">cur_date</span>:8:2<span class="k">}</span>
+<span class="nv">partition_date</span><span class="o">=</span><span
class="s2">"dt='</span><span class="nv">$dt</span><span
class="s2">',hour='</span><span class="nv">$hour</span><span
class="s2">'"</span>
+<span class="nb">sed </span>s/PARTITION_DATE/<span
class="nv">$partition_date</span>/ ./insert-data.hql.template <span
class="o">></span> insert-data.hql
+hive <span class="nt">-f</span> insert-data.hql
+<span class="nv">src_done_path</span><span
class="o">=</span>/griffin/data/batch/demo_src/dt<span class="o">=</span><span
class="k">${</span><span class="nv">dt</span><span class="k">}</span>/hour<span
class="o">=</span><span class="k">${</span><span class="nv">hour</span><span
class="k">}</span>/_DONE
+<span class="nv">tgt_done_path</span><span
class="o">=</span>/griffin/data/batch/demo_tgt/dt<span class="o">=</span><span
class="k">${</span><span class="nv">dt</span><span class="k">}</span>/hour<span
class="o">=</span><span class="k">${</span><span class="nv">hour</span><span
class="k">}</span>/_DONE
+hadoop fs <span class="nt">-mkdir</span> <span class="nt">-p</span>
/griffin/data/batch/demo_src/dt<span class="o">=</span><span
class="k">${</span><span class="nv">dt</span><span class="k">}</span>/hour<span
class="o">=</span><span class="k">${</span><span class="nv">hour</span><span
class="k">}</span>
+hadoop fs <span class="nt">-mkdir</span> <span class="nt">-p</span>
/griffin/data/batch/demo_tgt/dt<span class="o">=</span><span
class="k">${</span><span class="nv">dt</span><span class="k">}</span>/hour<span
class="o">=</span><span class="k">${</span><span class="nv">hour</span><span
class="k">}</span>
+hadoop fs <span class="nt">-touchz</span> <span class="k">${</span><span
class="nv">src_done_path</span><span class="k">}</span>
+hadoop fs <span class="nt">-touchz</span> <span class="k">${</span><span
class="nv">tgt_done_path</span><span class="k">}</span>
+<span class="nb">echo</span> <span class="s2">"insert data [</span><span
class="nv">$partition_date</span><span class="s2">] done"</span>
+
+<span class="c">#last hour</span>
+<span class="nb">sudo</span> ./gen_demo_data.sh
+<span class="nv">cur_date</span><span class="o">=</span><span
class="sb">`</span><span class="nb">date</span> <span class="nt">-d</span>
<span class="s1">'1 hour ago'</span> +%Y%m%d%H<span class="sb">`</span>
+<span class="nv">dt</span><span class="o">=</span><span
class="k">${</span><span class="nv">cur_date</span>:0:8<span class="k">}</span>
+<span class="nv">hour</span><span class="o">=</span><span
class="k">${</span><span class="nv">cur_date</span>:8:2<span class="k">}</span>
+<span class="nv">partition_date</span><span class="o">=</span><span
class="s2">"dt='</span><span class="nv">$dt</span><span
class="s2">',hour='</span><span class="nv">$hour</span><span
class="s2">'"</span>
+<span class="nb">sed </span>s/PARTITION_DATE/<span
class="nv">$partition_date</span>/ ./insert-data.hql.template <span
class="o">></span> insert-data.hql
+hive <span class="nt">-f</span> insert-data.hql
+<span class="nv">src_done_path</span><span
class="o">=</span>/griffin/data/batch/demo_src/dt<span class="o">=</span><span
class="k">${</span><span class="nv">dt</span><span class="k">}</span>/hour<span
class="o">=</span><span class="k">${</span><span class="nv">hour</span><span
class="k">}</span>/_DONE
+<span class="nv">tgt_done_path</span><span
class="o">=</span>/griffin/data/batch/demo_tgt/dt<span class="o">=</span><span
class="k">${</span><span class="nv">dt</span><span class="k">}</span>/hour<span
class="o">=</span><span class="k">${</span><span class="nv">hour</span><span
class="k">}</span>/_DONE
+hadoop fs <span class="nt">-mkdir</span> <span class="nt">-p</span>
/griffin/data/batch/demo_src/dt<span class="o">=</span><span
class="k">${</span><span class="nv">dt</span><span class="k">}</span>/hour<span
class="o">=</span><span class="k">${</span><span class="nv">hour</span><span
class="k">}</span>
+hadoop fs <span class="nt">-mkdir</span> <span class="nt">-p</span>
/griffin/data/batch/demo_tgt/dt<span class="o">=</span><span
class="k">${</span><span class="nv">dt</span><span class="k">}</span>/hour<span
class="o">=</span><span class="k">${</span><span class="nv">hour</span><span
class="k">}</span>
+hadoop fs <span class="nt">-touchz</span> <span class="k">${</span><span
class="nv">src_done_path</span><span class="k">}</span>
+hadoop fs <span class="nt">-touchz</span> <span class="k">${</span><span
class="nv">tgt_done_path</span><span class="k">}</span>
+<span class="nb">echo</span> <span class="s2">"insert data [</span><span
class="nv">$partition_date</span><span class="s2">] done"</span>
+
+<span class="c">#next hours</span>
+<span class="nb">set</span> +e
+<span class="k">while </span><span class="nb">true
+</span><span class="k">do
+ </span><span class="nb">sudo</span> ./gen_demo_data.sh
+ <span class="nv">cur_date</span><span class="o">=</span><span
class="sb">`</span><span class="nb">date</span> +%Y%m%d%H<span
class="sb">`</span>
+ <span class="nv">next_date</span><span class="o">=</span><span
class="sb">`</span><span class="nb">date</span> <span class="nt">-d</span>
<span class="s2">"+1hour"</span> <span class="s1">'+%Y%m%d%H'</span><span
class="sb">`</span>
+ <span class="nv">dt</span><span class="o">=</span><span
class="k">${</span><span class="nv">next_date</span>:0:8<span class="k">}</span>
+ <span class="nv">hour</span><span class="o">=</span><span
class="k">${</span><span class="nv">next_date</span>:8:2<span class="k">}</span>
+ <span class="nv">partition_date</span><span class="o">=</span><span
class="s2">"dt='</span><span class="nv">$dt</span><span
class="s2">',hour='</span><span class="nv">$hour</span><span
class="s2">'"</span>
+ <span class="nb">sed </span>s/PARTITION_DATE/<span
class="nv">$partition_date</span>/ ./insert-data.hql.template <span
class="o">></span> insert-data.hql
+ hive <span class="nt">-f</span> insert-data.hql
+ <span class="nv">src_done_path</span><span
class="o">=</span>/griffin/data/batch/demo_src/dt<span class="o">=</span><span
class="k">${</span><span class="nv">dt</span><span class="k">}</span>/hour<span
class="o">=</span><span class="k">${</span><span class="nv">hour</span><span
class="k">}</span>/_DONE
+ <span class="nv">tgt_done_path</span><span
class="o">=</span>/griffin/data/batch/demo_tgt/dt<span class="o">=</span><span
class="k">${</span><span class="nv">dt</span><span class="k">}</span>/hour<span
class="o">=</span><span class="k">${</span><span class="nv">hour</span><span
class="k">}</span>/_DONE
+ hadoop fs <span class="nt">-mkdir</span> <span class="nt">-p</span>
/griffin/data/batch/demo_src/dt<span class="o">=</span><span
class="k">${</span><span class="nv">dt</span><span class="k">}</span>/hour<span
class="o">=</span><span class="k">${</span><span class="nv">hour</span><span
class="k">}</span>
+ hadoop fs <span class="nt">-mkdir</span> <span class="nt">-p</span>
/griffin/data/batch/demo_tgt/dt<span class="o">=</span><span
class="k">${</span><span class="nv">dt</span><span class="k">}</span>/hour<span
class="o">=</span><span class="k">${</span><span class="nv">hour</span><span
class="k">}</span>
+ hadoop fs <span class="nt">-touchz</span> <span class="k">${</span><span
class="nv">src_done_path</span><span class="k">}</span>
+ hadoop fs <span class="nt">-touchz</span> <span class="k">${</span><span
class="nv">tgt_done_path</span><span class="k">}</span>
+ <span class="nb">echo</span> <span class="s2">"insert data [</span><span
class="nv">$partition_date</span><span class="s2">] done"</span>
+ <span class="nb">sleep </span>3600
+<span class="k">done
+</span><span class="nb">set</span> <span class="nt">-e</span>
+</code></pre></div></div>
+
+<p>2、HDFS的/griffin/persist目录下没有统计结果文件,检查该目录的权限,设置合适的权限即可。</p>
+
+<p>3、ES中的metric数据为空,有两种可能:</p>
+
+<ul>
+ <li>service/src/main/resources/env/env_batch.json里的ES配置信息不正确</li>
+ <li>执行spark任务的yarn服务器上没有配置ES服务器的hostname,连接异常</li>
+</ul>
+
+<p>4、启动service-0.4.0.jar之后,访问不到UI界面,查看启动日志无异常。检查打包时是不是执行的mvn
package命令,将该命令替换成mvn -Dmaven.test.skip=true clean install命令重新打包启动即可。</p>
+
+
+ </div><!--end of loadcontent-->
+ </div>
+ <!--end of centered content-->
+ </div>
+</div>
+<!--end of container-->
+
+
+<!-- footer start -->
+<div class="footerwrapper">
+ <div class="container">
+ <div class="row">
+ <div class="col-md-3">
+ <img src="/images/incubator_feather_egg_logo.png" height="60">
+ </div>
+ <div class="col-md-9">
+ <div style="margin-left:auto; margin-right:auto;
text-align:center;font-size:12px;">
+ <div>
+ Apache Griffin is an effort undergoing incubation at
The Apache Software Foundation (ASF), sponsored by the Apache Incubator.
Incubation is required of all newly accepted projects until a further review
indicates that the infrastructure, communications, and decision making process
have stabilized in a manner consistent with other successful ASF projects.
While incubation status is not necessarily a reflection of the completeness or
stability of the code, it does i [...]
+ </div>
+ </div>
+ </div>
+ </div>
+ <div class="row" style="padding-top:10px;">
+ Copyright © 2018 The Apache Software Foundation, Licensed under
the <a href="http://www.apache.org/licenses/LICENSE-2.0">Apache License,
Version 2.0</a>.<br>
+ Apache Griffin, Griffin, Apache, the Apache feather
logo and the Apache Griffin logo are trademarks of The Apache Software
Foundation.
+ </div>
+ <div class="row text-center" style="padding-top:10px;">
+ <a
href="https://www.apache.org/events/current-event.html">
+ <img
src="https://www.apache.org/events/current-event-234x60.png" alt="ASF Current
Event">
+ </a>
+ </div>
+ </div>
+</div>
+<!-- footer end -->
+
+<!-- JavaScripts -->
+<script src="https://code.jquery.com/jquery-2.2.4.min.js"></script>
+
+
+
+</body>
+</html>
diff --git a/docs/quickstart.html b/docs/quickstart.html
index 512b63d..dba69d5 100644
--- a/docs/quickstart.html
+++ b/docs/quickstart.html
@@ -97,6 +97,8 @@ under the License.
<li class="sidenavli current"><a href="/docs/quickstart.html"
data-permalink="/docs/quickstart.html" id="">Quick Start</a></li>
+ <li class="sidenavli "><a href="/docs/quickstart-cn.html"
data-permalink="/docs/quickstart.html" id="">Quick Start (Chinese
Version)</a></li>
+
<li class="sidenavli "><a href="/docs/usecases.html"
data-permalink="/docs/quickstart.html" id="">Streaming Use Cases</a></li>
<li class="sidenavli "><a href="/docs/profiling.html"
data-permalink="/docs/quickstart.html" id="">Profiling Use Cases</a></li>
diff --git a/docs/usecases.html b/docs/usecases.html
index 7cdc993..4d186b2 100644
--- a/docs/usecases.html
+++ b/docs/usecases.html
@@ -97,6 +97,8 @@ under the License.
<li class="sidenavli "><a href="/docs/quickstart.html"
data-permalink="/docs/usecases.html" id="">Quick Start</a></li>
+ <li class="sidenavli "><a href="/docs/quickstart-cn.html"
data-permalink="/docs/usecases.html" id="">Quick Start (Chinese
Version)</a></li>
+
<li class="sidenavli current"><a href="/docs/usecases.html"
data-permalink="/docs/usecases.html" id="">Streaming Use Cases</a></li>
<li class="sidenavli "><a href="/docs/profiling.html"
data-permalink="/docs/usecases.html" id="">Profiling Use Cases</a></li>
diff --git a/images/arch-1.png b/images/arch-1.png
new file mode 100644
index 0000000..93bc755
Binary files /dev/null and b/images/arch-1.png differ
diff --git a/images/dashboard-big.png b/images/dashboard-big.png
new file mode 100644
index 0000000..aa796b6
Binary files /dev/null and b/images/dashboard-big.png differ
diff --git a/images/project.jpg b/images/project.jpg
new file mode 100644
index 0000000..6f446f2
Binary files /dev/null and b/images/project.jpg differ