Repository: incubator-griffin-site
Updated Branches:
  refs/heads/asf-site 8ef198e4b -> fbbfc965b


Site updated: 2018-03-14 16:37:02


Project: http://git-wip-us.apache.org/repos/asf/incubator-griffin-site/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-griffin-site/commit/fbbfc965
Tree: 
http://git-wip-us.apache.org/repos/asf/incubator-griffin-site/tree/fbbfc965
Diff: 
http://git-wip-us.apache.org/repos/asf/incubator-griffin-site/diff/fbbfc965

Branch: refs/heads/asf-site
Commit: fbbfc965b540fa59b93284c017008deae77bf208
Parents: 8ef198e
Author: guoyp <gu...@apache.org>
Authored: Wed Mar 14 16:37:03 2018 +0800
Committer: guoyp <gu...@apache.org>
Committed: Wed Mar 14 16:37:03 2018 +0800

----------------------------------------------------------------------
 2017/03/03/plan/index.html      |  14 +-
 2017/03/04/community/index.html |  20 +--
 2017/11/07/release/index.html   |  29 +++--
 2018/03/10/home/index.html      | 239 +++++++++++++++++++++++++++++++++++
 archives/2017/03/index.html     |  29 +----
 archives/2017/11/index.html     |  10 +-
 archives/2017/index.html        |  29 +----
 archives/2018/03/index.html     | 170 +++++++++++++++++++++++++
 archives/2018/index.html        | 170 +++++++++++++++++++++++++
 archives/index.html             |  34 +++--
 index.html                      | 186 +++++++++++++--------------
 11 files changed, 745 insertions(+), 185 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-griffin-site/blob/fbbfc965/2017/03/03/plan/index.html
----------------------------------------------------------------------
diff --git a/2017/03/03/plan/index.html b/2017/03/03/plan/index.html
index 3ac42af..74c7cd5 100644
--- a/2017/03/03/plan/index.html
+++ b/2017/03/03/plan/index.html
@@ -107,9 +107,9 @@ profiling target data asset, providing statistics by 
differen">
       </nav>
       <nav id="sub-nav">
         
-          <a id="nav-rss-link" class="nav-icon" href="/atom.xml" title="RSS 
Feed"></a>
+          <a id="nav-rss-link" class="nav-icon" href="/atom.xml" title="Flux 
RSS"></a>
         
-        <a id="nav-search-btn" class="nav-icon" title="Search"></a>
+        <a id="nav-search-btn" class="nav-icon" title="Rechercher"></a>
       </nav>
       <div id="search-form-wrap">
         <form action="//google.com/search" method="get" accept-charset="UTF-8" 
class="search-form"><input type="search" name="q" results="0" 
class="search-form-input" placeholder="Search"><button type="submit" 
class="search-form-submit">&#xF002;</button><input type="hidden" 
name="sitesearch" value="http://yoursite.com";></form>
@@ -237,7 +237,7 @@ profiling target data asset, providing statistics by 
differen">
       
     </div>
     <footer class="article-footer">
-      <a data-url="http://yoursite.com/2017/03/03/plan/"; 
data-id="cjeqth40x0001rzpomekh1njt" class="article-share-link">Share</a>
+      <a data-url="http://yoursite.com/2017/03/03/plan/"; 
data-id="cjequ0plm0000c2ponumo1lae" class="article-share-link">Partager</a>
       
       
     </footer>
@@ -247,7 +247,7 @@ profiling target data asset, providing statistics by 
differen">
 <nav id="article-nav">
   
     <a href="/2017/03/04/community/" id="article-nav-newer" 
class="article-nav-link-wrap">
-      <strong class="article-nav-caption">Newer</strong>
+      <strong class="article-nav-caption">Récent</strong>
       <div class="article-nav-title">
         
           Community
@@ -272,11 +272,11 @@ profiling target data asset, providing statistics by 
differen">
       <ul>
         
           <li>
-            <a href="/2017/11/07/release/">Release</a>
+            <a href="/2018/03/10/home/">Apache Griffin</a>
           </li>
         
           <li>
-            <a href="/2017/03/30/home/">Apache Griffin</a>
+            <a href="/2017/11/07/release/">Release</a>
           </li>
         
           <li>
@@ -306,7 +306,7 @@ profiling target data asset, providing statistics by 
differen">
   <div class="outer">
     <div id="footer-info" class="inner">
       &copy; 2018
-      Powered by <a href="http://hexo.io/"; target="_blank">Hexo</a>
+      Propulsé by <a href="http://hexo.io/"; target="_blank">Hexo</a>
     </div>
   </div>
 </footer>

http://git-wip-us.apache.org/repos/asf/incubator-griffin-site/blob/fbbfc965/2017/03/04/community/index.html
----------------------------------------------------------------------
diff --git a/2017/03/04/community/index.html b/2017/03/04/community/index.html
index 01d2f23..855662a 100644
--- a/2017/03/04/community/index.html
+++ b/2017/03/04/community/index.html
@@ -65,9 +65,9 @@ To unsub">
       </nav>
       <nav id="sub-nav">
         
-          <a id="nav-rss-link" class="nav-icon" href="/atom.xml" title="RSS 
Feed"></a>
+          <a id="nav-rss-link" class="nav-icon" href="/atom.xml" title="Flux 
RSS"></a>
         
-        <a id="nav-search-btn" class="nav-icon" title="Search"></a>
+        <a id="nav-search-btn" class="nav-icon" title="Rechercher"></a>
       </nav>
       <div id="search-form-wrap">
         <form action="//google.com/search" method="get" accept-charset="UTF-8" 
class="search-form"><input type="search" name="q" results="0" 
class="search-form-input" placeholder="Search"><button type="submit" 
class="search-form-submit">&#xF002;</button><input type="hidden" 
name="sitesearch" value="http://yoursite.com";></form>
@@ -134,7 +134,7 @@ To unsub">
       
     </div>
     <footer class="article-footer">
-      <a data-url="http://yoursite.com/2017/03/04/community/"; 
data-id="cjeqth40t0000rzpofzb69p9l" class="article-share-link">Share</a>
+      <a data-url="http://yoursite.com/2017/03/04/community/"; 
data-id="cjequ0pls0002c2pocjijfgio" class="article-share-link">Partager</a>
       
       
     </footer>
@@ -143,18 +143,18 @@ To unsub">
     
 <nav id="article-nav">
   
-    <a href="/2017/03/30/home/" id="article-nav-newer" 
class="article-nav-link-wrap">
-      <strong class="article-nav-caption">Newer</strong>
+    <a href="/2017/11/07/release/" id="article-nav-newer" 
class="article-nav-link-wrap">
+      <strong class="article-nav-caption">Récent</strong>
       <div class="article-nav-title">
         
-          Apache Griffin
+          Release
         
       </div>
     </a>
   
   
     <a href="/2017/03/03/plan/" id="article-nav-older" 
class="article-nav-link-wrap">
-      <strong class="article-nav-caption">Older</strong>
+      <strong class="article-nav-caption">Ancien</strong>
       <div class="article-nav-title">Plan</div>
     </a>
   
@@ -174,11 +174,11 @@ To unsub">
       <ul>
         
           <li>
-            <a href="/2017/11/07/release/">Release</a>
+            <a href="/2018/03/10/home/">Apache Griffin</a>
           </li>
         
           <li>
-            <a href="/2017/03/30/home/">Apache Griffin</a>
+            <a href="/2017/11/07/release/">Release</a>
           </li>
         
           <li>
@@ -208,7 +208,7 @@ To unsub">
   <div class="outer">
     <div id="footer-info" class="inner">
       &copy; 2018
-      Powered by <a href="http://hexo.io/"; target="_blank">Hexo</a>
+      Propulsé by <a href="http://hexo.io/"; target="_blank">Hexo</a>
     </div>
   </div>
 </footer>

http://git-wip-us.apache.org/repos/asf/incubator-griffin-site/blob/fbbfc965/2017/11/07/release/index.html
----------------------------------------------------------------------
diff --git a/2017/11/07/release/index.html b/2017/11/07/release/index.html
index 407f699..55cdafb 100644
--- a/2017/11/07/release/index.html
+++ b/2017/11/07/release/index.html
@@ -19,7 +19,7 @@ Highlights
 Streaming: measure streaming data quality based on defined measurements.
 Support Griffin DSL and SQL to define data quality measurement.
 Su">
-<meta property="og:updated_time" content="2017-11-29T02:38:41.000Z">
+<meta property="og:updated_time" content="2018-03-14T08:35:01.000Z">
 <meta name="twitter:card" content="summary">
 <meta name="twitter:title" content="Release">
 <meta name="twitter:description" content="Release Notes - Apache Griffin 0.1.6 
(incubating)
@@ -68,9 +68,9 @@ Su">
       </nav>
       <nav id="sub-nav">
         
-          <a id="nav-rss-link" class="nav-icon" href="/atom.xml" title="RSS 
Feed"></a>
+          <a id="nav-rss-link" class="nav-icon" href="/atom.xml" title="Flux 
RSS"></a>
         
-        <a id="nav-search-btn" class="nav-icon" title="Search"></a>
+        <a id="nav-search-btn" class="nav-icon" title="Rechercher"></a>
       </nav>
       <div id="search-form-wrap">
         <form action="//google.com/search" method="get" accept-charset="UTF-8" 
class="search-form"><input type="search" name="q" results="0" 
class="search-form-input" placeholder="Search"><button type="submit" 
class="search-form-submit">&#xF002;</button><input type="hidden" 
name="sitesearch" value="http://yoursite.com";></form>
@@ -193,7 +193,7 @@ Su">
       
     </div>
     <footer class="article-footer">
-      <a data-url="http://yoursite.com/2017/11/07/release/"; 
data-id="cjeqth4110003rzpo89pdl2vz" class="article-share-link">Share</a>
+      <a data-url="http://yoursite.com/2017/11/07/release/"; 
data-id="cjequ0plt0003c2po150098k6" class="article-share-link">Partager</a>
       
       
     </footer>
@@ -202,10 +202,19 @@ Su">
     
 <nav id="article-nav">
   
+    <a href="/2018/03/10/home/" id="article-nav-newer" 
class="article-nav-link-wrap">
+      <strong class="article-nav-caption">Récent</strong>
+      <div class="article-nav-title">
+        
+          Apache Griffin
+        
+      </div>
+    </a>
   
-    <a href="/2017/03/30/home/" id="article-nav-older" 
class="article-nav-link-wrap">
-      <strong class="article-nav-caption">Older</strong>
-      <div class="article-nav-title">Apache Griffin</div>
+  
+    <a href="/2017/03/04/community/" id="article-nav-older" 
class="article-nav-link-wrap">
+      <strong class="article-nav-caption">Ancien</strong>
+      <div class="article-nav-title">Community</div>
     </a>
   
 </nav>
@@ -224,11 +233,11 @@ Su">
       <ul>
         
           <li>
-            <a href="/2017/11/07/release/">Release</a>
+            <a href="/2018/03/10/home/">Apache Griffin</a>
           </li>
         
           <li>
-            <a href="/2017/03/30/home/">Apache Griffin</a>
+            <a href="/2017/11/07/release/">Release</a>
           </li>
         
           <li>
@@ -258,7 +267,7 @@ Su">
   <div class="outer">
     <div id="footer-info" class="inner">
       &copy; 2018
-      Powered by <a href="http://hexo.io/"; target="_blank">Hexo</a>
+      Propulsé by <a href="http://hexo.io/"; target="_blank">Hexo</a>
     </div>
   </div>
 </footer>

http://git-wip-us.apache.org/repos/asf/incubator-griffin-site/blob/fbbfc965/2018/03/10/home/index.html
----------------------------------------------------------------------
diff --git a/2018/03/10/home/index.html b/2018/03/10/home/index.html
new file mode 100644
index 0000000..0e8c0ed
--- /dev/null
+++ b/2018/03/10/home/index.html
@@ -0,0 +1,239 @@
+<!DOCTYPE html>
+<html>
+<head>
+  <meta charset="utf-8">
+  
+  <title>Apache Griffin | Apache Griffin</title>
+  <meta name="viewport" content="width=device-width, initial-scale=1, 
maximum-scale=1">
+  <meta name="description" content="AbstractApache Griffin is a Data Quality 
Service platform built on Apache Hadoop and Apache Spark. It provides a 
framework process for defining data quality model, executing data quality 
measurement,">
+<meta property="og:type" content="article">
+<meta property="og:title" content="Apache Griffin">
+<meta property="og:url" 
content="http://yoursite.com/2018/03/10/home/index.html";>
+<meta property="og:site_name" content="Apache Griffin">
+<meta property="og:description" content="AbstractApache Griffin is a Data 
Quality Service platform built on Apache Hadoop and Apache Spark. It provides a 
framework process for defining data quality model, executing data quality 
measurement,">
+<meta property="og:image" 
content="http://yoursite.com/images/Business_Process.png";>
+<meta property="og:image" content="http://yoursite.com/images/arch.png";>
+<meta property="og:image" content="http://yoursite.com/images/techstack.png";>
+<meta property="og:updated_time" content="2018-03-14T08:35:11.000Z">
+<meta name="twitter:card" content="summary">
+<meta name="twitter:title" content="Apache Griffin">
+<meta name="twitter:description" content="AbstractApache Griffin is a Data 
Quality Service platform built on Apache Hadoop and Apache Spark. It provides a 
framework process for defining data quality model, executing data quality 
measurement,">
+<meta name="twitter:image" 
content="http://yoursite.com/images/Business_Process.png";>
+  
+    <link rel="alternate" href="/atom.xml" title="Apache Griffin" 
type="application/atom+xml">
+  
+  
+    <link rel="icon" href="/favicon.png">
+  
+  
+    <link href="//fonts.googleapis.com/css?family=Source+Code+Pro" 
rel="stylesheet" type="text/css">
+  
+  <link rel="stylesheet" href="/css/style.css">
+  
+
+</head>
+
+<body>
+  <div id="container">
+    <div id="wrap">
+      <header id="header">
+  <div id="banner"></div>
+  <div id="header-outer" class="outer">
+    <div id="header-title" class="inner">
+      <h1 id="logo-wrap">
+        <a href="/" id="logo">Apache Griffin</a>
+      </h1>
+      
+        <h2 id="subtitle-wrap">
+          <a href="/" id="subtitle">Data Quality Proposal for Streaming and 
Batch</a>
+        </h2>
+      
+    </div>
+    <div id="header-inner" class="inner">
+      <nav id="main-nav">
+        <a id="main-nav-toggle" class="nav-icon"></a>
+        
+          <a class="main-nav-link" href="/">Home</a>
+        
+          <a class="main-nav-link" href="/archives">Archives</a>
+        
+      </nav>
+      <nav id="sub-nav">
+        
+          <a id="nav-rss-link" class="nav-icon" href="/atom.xml" title="Flux 
RSS"></a>
+        
+        <a id="nav-search-btn" class="nav-icon" title="Rechercher"></a>
+      </nav>
+      <div id="search-form-wrap">
+        <form action="//google.com/search" method="get" accept-charset="UTF-8" 
class="search-form"><input type="search" name="q" results="0" 
class="search-form-input" placeholder="Search"><button type="submit" 
class="search-form-submit">&#xF002;</button><input type="hidden" 
name="sitesearch" value="http://yoursite.com";></form>
+      </div>
+    </div>
+  </div>
+</header>
+      <div class="outer">
+        <section id="main"><article id="post-home" class="article 
article-type-post" itemscope itemprop="blogPost">
+  <div class="article-meta">
+    <a href="/2018/03/10/home/" class="article-date">
+  <time datetime="2018-03-10T02:49:47.000Z" 
itemprop="datePublished">2018-03-10</time>
+</a>
+    
+  </div>
+  <div class="article-inner">
+    
+    
+      <header class="article-header">
+        
+  
+    <h1 class="article-title" itemprop="name">
+      Apache Griffin
+    </h1>
+  
+
+      </header>
+    
+    <div class="article-entry" itemprop="articleBody">
+      
+        <h2 id="Abstract"><a href="#Abstract" class="headerlink" 
title="Abstract"></a>Abstract</h2><p>Apache Griffin is a Data Quality Service 
platform built on Apache Hadoop and Apache Spark. It provides a framework 
process for defining data quality model, executing data quality measurement, 
automating data profiling and validation, as well as a unified data quality 
visualization across multiple data systems.  It tries to address the data 
quality challenges in big data and streaming context.</p>
+<h2 id="Overview-of-Apache-Griffin"><a href="#Overview-of-Apache-Griffin" 
class="headerlink" title="Overview of Apache Griffin"></a>Overview of Apache 
Griffin</h2><p>When people use big data (Hadoop or other streaming systems), 
measurement of data quality is a big challenge. Different teams have built 
customized tools to detect and analyze data quality issues within their own 
domains. As a platform organization, we think of taking a platform approach to 
commonly occurring patterns. As such, we are building a platform to provide 
shared Infrastructure and generic features to solve common data quality pain 
points. This would enable us to build trusted data assets.</p>
+<p>Currently it is very difficult and costly to do data quality validation 
when we have large volumes of related data flowing across multi-platforms 
(streaming and batch). Take eBay’s Real-time Personalization Platform as a 
sample; Everyday we have to validate the data quality for ~600M records. Data 
quality often becomes one big challenge in this complex environment and massive 
scale.</p>
+<p>We detect the following at eBay:</p>
+<ol>
+<li>Lack of an end-to-end, unified view of data quality from multiple data 
sources to target applications that takes into account the lineage of the data. 
This results in a long time to identify and fix data quality issues.</li>
+<li>Lack of a system to measure data quality in streaming mode through 
self-service. The need is for a system where datasets can be registered, data 
quality models can be defined, data quality can be visualized and monitored 
using a simple tool and teams alerted when an issue is detected.</li>
+<li>Lack of a Shared platform and API Service. Every team should not have to 
apply and manage own hardware and software infrastructure to solve this common 
problem.</li>
+</ol>
+<p>With these in mind, we decided to build Apache Griffin - A data quality 
service that aims to solve the above short-comings.</p>
+<p>Apache Griffin includes:</p>
+<p><strong>Data Quality Model Engine</strong>: Apache Griffin is model driven 
solution, user can choose various data quality dimension to execute his/her 
data quality validation based on selected target data-set or source data-set ( 
as the golden reference data). It has corresponding library supporting it in 
back-end for the following measurement:</p>
+<ul>
+<li>Accuracy - Does data reflect the real-world objects or a verifiable 
source</li>
+<li>Completeness - Is all necessary data present</li>
+<li>Validity -  Are all data values within the data domains specified by the 
business</li>
+<li>Timeliness - Is the data available at the time needed</li>
+<li>Anomaly detection -  Pre-built algorithm functions for the identification 
of items, events or observations which do not conform to an expected pattern or 
other items in a dataset</li>
+<li>Data Profiling - Apply statistical analysis and assessment of data values 
within a dataset for consistency, uniqueness and logic.</li>
+</ul>
+<p><strong>Data Collection Layer</strong>:</p>
+<p>We support two kinds of data sources, batch data and real time data.</p>
+<p>For batch mode, we can collect data source from  our Hadoop platform by 
various data connectors.</p>
+<p>For real time mode, we can connect with messaging system like Kafka to near 
real time analysis.</p>
+<p><strong>Data Process and Storage Layer</strong>:</p>
+<p>For batch analysis, our data quality model will compute data quality 
metrics in our spark cluster based on data source in hadoop.</p>
+<p>For near real time analysis, we consume data from messaging system, then 
our data quality model will compute our real time data quality metrics in our 
spark cluster. for data storage, we use time series database in our back end to 
fulfill front end request.</p>
+<p><strong>Apache Griffin Service</strong>:</p>
+<p>We have RESTful web services to accomplish all the functionalities of 
Apache Griffin, such as register data-set, create data quality model, publish 
metrics, retrieve metrics, add subscription, etc. So, the developers can 
develop their own user interface based on these web serivces.</p>
+<h2 id="Main-business-process"><a href="#Main-business-process" 
class="headerlink" title="Main business process"></a>Main business 
process</h2><p><img src="/images/Business_Process.png" alt=""></p>
+<h2 id="Architecture-diagram"><a href="#Architecture-diagram" 
class="headerlink" title="Architecture diagram"></a>Architecture 
diagram</h2><p><img src="/images/arch.png" alt=""></p>
+<h2 id="Tech-stack"><a href="#Tech-stack" class="headerlink" title="Tech 
stack"></a>Tech stack</h2><p><img src="/images/techstack.png" alt=""></p>
+<h2 id="Rationale"><a href="#Rationale" class="headerlink" 
title="Rationale"></a>Rationale</h2><p>The challenge we face at eBay is that 
our data volume is becoming bigger and bigger, systems process become more 
complex, while we do not have a unified data quality solution to ensure the 
trusted data sets which provide confidences on data quality to our data 
consumers.  The key challenges on data quality includes:</p>
+<ol>
+<li>Existing commercial data quality solution cannot address data quality 
lineage among systems, cannot scale out to support fast growing data at 
eBay</li>
+<li>Existing eBay’s domain specific tools take a long time to identify and 
fix poor data quality when data flowed through multiple systems</li>
+<li>Business logic becomes complex, requires data quality system much 
flexible.</li>
+<li>Some data quality issues do have business impact on user experiences, 
revenue, efficiency &amp; compliance.</li>
+<li>Communication overhead of data quality metrics, typically in a big 
organization, which involve different teams.</li>
+</ol>
+<p>The idea of  Apache Apache Griffin is to provide Data Quality validation as 
a Service, to allow data engineers and data consumers to have:</p>
+<ul>
+<li>Near real-time understanding of the data quality health of your data 
pipelines with end-to-end monitoring, all in one place.</li>
+<li>Profiling, detecting and correlating issues and providing recommendations 
that drive rapid and focused troubleshooting</li>
+<li>A centralized data quality model management system including rule, 
metadata, scheduler etc.  </li>
+<li>Native code generation to run everywhere, including Hadoop, Kafka, Spark, 
etc.</li>
+<li>One set of tools to build data quality pipelines across all eBay data 
platforms.</li>
+</ul>
+<h2 id="Disclaimer"><a href="#Disclaimer" class="headerlink" 
title="Disclaimer"></a>Disclaimer</h2><p>Apache Griffin is an effort undergoing 
incubation at The Apache Software Foundation (ASF), sponsored by the Apache 
Incubator. Incubation is required of all newly accepted projects until a 
further review indicates that the infrastructure, communications, and decision 
making process have stabilized in a manner consistent with other successful ASF 
projects. While incubation status is not necessarily a reflection of the 
completeness or stability of the code, it does indicate that the project has 
yet to be fully endorsed by the ASF.</p>
+
+      
+    </div>
+    <footer class="article-footer">
+      <a data-url="http://yoursite.com/2018/03/10/home/"; 
data-id="cjequ0plq0001c2po1nm44ixq" class="article-share-link">Partager</a>
+      
+      
+    </footer>
+  </div>
+  
+    
+<nav id="article-nav">
+  
+  
+    <a href="/2017/11/07/release/" id="article-nav-older" 
class="article-nav-link-wrap">
+      <strong class="article-nav-caption">Ancien</strong>
+      <div class="article-nav-title">Release</div>
+    </a>
+  
+</nav>
+
+  
+</article>
+
+</section>
+        
+          <aside id="sidebar">
+  
+    
+  <div class="widget-wrap">
+    <h3 class="widget-title">Links</h3>
+    <div class="widget">
+      <ul>
+        
+          <li>
+            <a href="/2018/03/10/home/">Apache Griffin</a>
+          </li>
+        
+          <li>
+            <a href="/2017/11/07/release/">Release</a>
+          </li>
+        
+          <li>
+            <a href="/2017/03/04/community/">Community</a>
+          </li>
+        
+          <li>
+            <a href="/2017/03/03/plan/">Plan</a>
+          </li>
+        
+      </ul>
+    </div>
+  </div>
+
+  
+      <div class="widget-wrap">
+    <div class="widget">
+        <img 
src="http://griffin.incubator.apache.org/images/egg-logo.png";></img>
+    </div>
+  </div>
+  
+</aside>
+        
+      </div>
+      <footer id="footer">
+  
+  <div class="outer">
+    <div id="footer-info" class="inner">
+      &copy; 2018
+      Propulsé by <a href="http://hexo.io/"; target="_blank">Hexo</a>
+    </div>
+  </div>
+</footer>
+    </div>
+    <nav id="mobile-nav">
+  
+    <a href="/" class="mobile-nav-link">Home</a>
+  
+    <a href="/archives" class="mobile-nav-link">Archives</a>
+  
+</nav>
+    
+
+<script 
src="//ajax.googleapis.com/ajax/libs/jquery/2.0.3/jquery.min.js"></script>
+
+
+  <link rel="stylesheet" href="/fancybox/jquery.fancybox.css">
+  <script src="/fancybox/jquery.fancybox.pack.js"></script>
+
+
+<script src="/js/script.js"></script>
+
+  </div>
+</body>
+</html>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-griffin-site/blob/fbbfc965/archives/2017/03/index.html
----------------------------------------------------------------------
diff --git a/archives/2017/03/index.html b/archives/2017/03/index.html
index 54f8fda..40486b5 100644
--- a/archives/2017/03/index.html
+++ b/archives/2017/03/index.html
@@ -52,9 +52,9 @@
       </nav>
       <nav id="sub-nav">
         
-          <a id="nav-rss-link" class="nav-icon" href="/atom.xml" title="RSS 
Feed"></a>
+          <a id="nav-rss-link" class="nav-icon" href="/atom.xml" title="Flux 
RSS"></a>
         
-        <a id="nav-search-btn" class="nav-icon" title="Search"></a>
+        <a id="nav-search-btn" class="nav-icon" title="Rechercher"></a>
       </nav>
       <div id="search-form-wrap">
         <form action="//google.com/search" method="get" accept-charset="UTF-8" 
class="search-form"><input type="search" name="q" results="0" 
class="search-form-input" placeholder="Search"><button type="submit" 
class="search-form-submit">&#xF002;</button><input type="hidden" 
name="sitesearch" value="http://yoursite.com";></form>
@@ -79,25 +79,6 @@
     <article class="archive-article archive-type-post">
   <div class="archive-article-inner">
     <header class="archive-article-header">
-      <a href="/2017/03/30/home/" class="archive-article-date">
-  <time datetime="2017-03-30T02:49:47.000Z" itemprop="datePublished">Mar 
30</time>
-</a>
-      
-  
-    <h1 itemprop="name">
-      <a class="archive-article-title" href="/2017/03/30/home/">Apache 
Griffin</a>
-    </h1>
-  
-
-    </header>
-  </div>
-</article>
-  
-    
-    
-    <article class="archive-article archive-type-post">
-  <div class="archive-article-inner">
-    <header class="archive-article-header">
       <a href="/2017/03/04/community/" class="archive-article-date">
   <time datetime="2017-03-04T05:00:45.000Z" itemprop="datePublished">Mar 
4</time>
 </a>
@@ -146,11 +127,11 @@
       <ul>
         
           <li>
-            <a href="/2017/11/07/release/">Release</a>
+            <a href="/2018/03/10/home/">Apache Griffin</a>
           </li>
         
           <li>
-            <a href="/2017/03/30/home/">Apache Griffin</a>
+            <a href="/2017/11/07/release/">Release</a>
           </li>
         
           <li>
@@ -180,7 +161,7 @@
   <div class="outer">
     <div id="footer-info" class="inner">
       &copy; 2018
-      Powered by <a href="http://hexo.io/"; target="_blank">Hexo</a>
+      Propulsé by <a href="http://hexo.io/"; target="_blank">Hexo</a>
     </div>
   </div>
 </footer>

http://git-wip-us.apache.org/repos/asf/incubator-griffin-site/blob/fbbfc965/archives/2017/11/index.html
----------------------------------------------------------------------
diff --git a/archives/2017/11/index.html b/archives/2017/11/index.html
index 40553d2..c64dc97 100644
--- a/archives/2017/11/index.html
+++ b/archives/2017/11/index.html
@@ -52,9 +52,9 @@
       </nav>
       <nav id="sub-nav">
         
-          <a id="nav-rss-link" class="nav-icon" href="/atom.xml" title="RSS 
Feed"></a>
+          <a id="nav-rss-link" class="nav-icon" href="/atom.xml" title="Flux 
RSS"></a>
         
-        <a id="nav-search-btn" class="nav-icon" title="Search"></a>
+        <a id="nav-search-btn" class="nav-icon" title="Rechercher"></a>
       </nav>
       <div id="search-form-wrap">
         <form action="//google.com/search" method="get" accept-charset="UTF-8" 
class="search-form"><input type="search" name="q" results="0" 
class="search-form-input" placeholder="Search"><button type="submit" 
class="search-form-submit">&#xF002;</button><input type="hidden" 
name="sitesearch" value="http://yoursite.com";></form>
@@ -108,11 +108,11 @@
       <ul>
         
           <li>
-            <a href="/2017/11/07/release/">Release</a>
+            <a href="/2018/03/10/home/">Apache Griffin</a>
           </li>
         
           <li>
-            <a href="/2017/03/30/home/">Apache Griffin</a>
+            <a href="/2017/11/07/release/">Release</a>
           </li>
         
           <li>
@@ -142,7 +142,7 @@
   <div class="outer">
     <div id="footer-info" class="inner">
       &copy; 2018
-      Powered by <a href="http://hexo.io/"; target="_blank">Hexo</a>
+      Propulsé by <a href="http://hexo.io/"; target="_blank">Hexo</a>
     </div>
   </div>
 </footer>

http://git-wip-us.apache.org/repos/asf/incubator-griffin-site/blob/fbbfc965/archives/2017/index.html
----------------------------------------------------------------------
diff --git a/archives/2017/index.html b/archives/2017/index.html
index 775fba5..e9bc577 100644
--- a/archives/2017/index.html
+++ b/archives/2017/index.html
@@ -52,9 +52,9 @@
       </nav>
       <nav id="sub-nav">
         
-          <a id="nav-rss-link" class="nav-icon" href="/atom.xml" title="RSS 
Feed"></a>
+          <a id="nav-rss-link" class="nav-icon" href="/atom.xml" title="Flux 
RSS"></a>
         
-        <a id="nav-search-btn" class="nav-icon" title="Search"></a>
+        <a id="nav-search-btn" class="nav-icon" title="Rechercher"></a>
       </nav>
       <div id="search-form-wrap">
         <form action="//google.com/search" method="get" accept-charset="UTF-8" 
class="search-form"><input type="search" name="q" results="0" 
class="search-form-input" placeholder="Search"><button type="submit" 
class="search-form-submit">&#xF002;</button><input type="hidden" 
name="sitesearch" value="http://yoursite.com";></form>
@@ -98,25 +98,6 @@
     <article class="archive-article archive-type-post">
   <div class="archive-article-inner">
     <header class="archive-article-header">
-      <a href="/2017/03/30/home/" class="archive-article-date">
-  <time datetime="2017-03-30T02:49:47.000Z" itemprop="datePublished">Mar 
30</time>
-</a>
-      
-  
-    <h1 itemprop="name">
-      <a class="archive-article-title" href="/2017/03/30/home/">Apache 
Griffin</a>
-    </h1>
-  
-
-    </header>
-  </div>
-</article>
-  
-    
-    
-    <article class="archive-article archive-type-post">
-  <div class="archive-article-inner">
-    <header class="archive-article-header">
       <a href="/2017/03/04/community/" class="archive-article-date">
   <time datetime="2017-03-04T05:00:45.000Z" itemprop="datePublished">Mar 
4</time>
 </a>
@@ -165,11 +146,11 @@
       <ul>
         
           <li>
-            <a href="/2017/11/07/release/">Release</a>
+            <a href="/2018/03/10/home/">Apache Griffin</a>
           </li>
         
           <li>
-            <a href="/2017/03/30/home/">Apache Griffin</a>
+            <a href="/2017/11/07/release/">Release</a>
           </li>
         
           <li>
@@ -199,7 +180,7 @@
   <div class="outer">
     <div id="footer-info" class="inner">
       &copy; 2018
-      Powered by <a href="http://hexo.io/"; target="_blank">Hexo</a>
+      Propulsé by <a href="http://hexo.io/"; target="_blank">Hexo</a>
     </div>
   </div>
 </footer>

http://git-wip-us.apache.org/repos/asf/incubator-griffin-site/blob/fbbfc965/archives/2018/03/index.html
----------------------------------------------------------------------
diff --git a/archives/2018/03/index.html b/archives/2018/03/index.html
new file mode 100644
index 0000000..ef561bf
--- /dev/null
+++ b/archives/2018/03/index.html
@@ -0,0 +1,170 @@
+<!DOCTYPE html>
+<html>
+<head>
+  <meta charset="utf-8">
+  
+  <title>Archives: 2018/3 | Apache Griffin</title>
+  <meta name="viewport" content="width=device-width, initial-scale=1, 
maximum-scale=1">
+  <meta property="og:type" content="website">
+<meta property="og:title" content="Apache Griffin">
+<meta property="og:url" 
content="http://yoursite.com/archives/2018/03/index.html";>
+<meta property="og:site_name" content="Apache Griffin">
+<meta name="twitter:card" content="summary">
+<meta name="twitter:title" content="Apache Griffin">
+  
+    <link rel="alternate" href="/atom.xml" title="Apache Griffin" 
type="application/atom+xml">
+  
+  
+    <link rel="icon" href="/favicon.png">
+  
+  
+    <link href="//fonts.googleapis.com/css?family=Source+Code+Pro" 
rel="stylesheet" type="text/css">
+  
+  <link rel="stylesheet" href="/css/style.css">
+  
+
+</head>
+
+<body>
+  <div id="container">
+    <div id="wrap">
+      <header id="header">
+  <div id="banner"></div>
+  <div id="header-outer" class="outer">
+    <div id="header-title" class="inner">
+      <h1 id="logo-wrap">
+        <a href="/" id="logo">Apache Griffin</a>
+      </h1>
+      
+        <h2 id="subtitle-wrap">
+          <a href="/" id="subtitle">Data Quality Proposal for Streaming and 
Batch</a>
+        </h2>
+      
+    </div>
+    <div id="header-inner" class="inner">
+      <nav id="main-nav">
+        <a id="main-nav-toggle" class="nav-icon"></a>
+        
+          <a class="main-nav-link" href="/">Home</a>
+        
+          <a class="main-nav-link" href="/archives">Archives</a>
+        
+      </nav>
+      <nav id="sub-nav">
+        
+          <a id="nav-rss-link" class="nav-icon" href="/atom.xml" title="Flux 
RSS"></a>
+        
+        <a id="nav-search-btn" class="nav-icon" title="Rechercher"></a>
+      </nav>
+      <div id="search-form-wrap">
+        <form action="//google.com/search" method="get" accept-charset="UTF-8" 
class="search-form"><input type="search" name="q" results="0" 
class="search-form-input" placeholder="Search"><button type="submit" 
class="search-form-submit">&#xF002;</button><input type="hidden" 
name="sitesearch" value="http://yoursite.com";></form>
+      </div>
+    </div>
+  </div>
+</header>
+      <div class="outer">
+        <section id="main">
+  
+  
+    
+    
+      
+      
+      <section class="archives-wrap">
+        <div class="archive-year-wrap">
+          <a href="/archives/2018" class="archive-year">2018</a>
+        </div>
+        <div class="archives">
+    
+    <article class="archive-article archive-type-post">
+  <div class="archive-article-inner">
+    <header class="archive-article-header">
+      <a href="/2018/03/10/home/" class="archive-article-date">
+  <time datetime="2018-03-10T02:49:47.000Z" itemprop="datePublished">Mar 
10</time>
+</a>
+      
+  
+    <h1 itemprop="name">
+      <a class="archive-article-title" href="/2018/03/10/home/">Apache 
Griffin</a>
+    </h1>
+  
+
+    </header>
+  </div>
+</article>
+  
+  
+    </div></section>
+  
+
+</section>
+        
+          <aside id="sidebar">
+  
+    
+  <div class="widget-wrap">
+    <h3 class="widget-title">Links</h3>
+    <div class="widget">
+      <ul>
+        
+          <li>
+            <a href="/2018/03/10/home/">Apache Griffin</a>
+          </li>
+        
+          <li>
+            <a href="/2017/11/07/release/">Release</a>
+          </li>
+        
+          <li>
+            <a href="/2017/03/04/community/">Community</a>
+          </li>
+        
+          <li>
+            <a href="/2017/03/03/plan/">Plan</a>
+          </li>
+        
+      </ul>
+    </div>
+  </div>
+
+  
+      <div class="widget-wrap">
+    <div class="widget">
+        <img 
src="http://griffin.incubator.apache.org/images/egg-logo.png";></img>
+    </div>
+  </div>
+  
+</aside>
+        
+      </div>
+      <footer id="footer">
+  
+  <div class="outer">
+    <div id="footer-info" class="inner">
+      &copy; 2018
+      Propulsé by <a href="http://hexo.io/"; target="_blank">Hexo</a>
+    </div>
+  </div>
+</footer>
+    </div>
+    <nav id="mobile-nav">
+  
+    <a href="/" class="mobile-nav-link">Home</a>
+  
+    <a href="/archives" class="mobile-nav-link">Archives</a>
+  
+</nav>
+    
+
+<script 
src="//ajax.googleapis.com/ajax/libs/jquery/2.0.3/jquery.min.js"></script>
+
+
+  <link rel="stylesheet" href="/fancybox/jquery.fancybox.css">
+  <script src="/fancybox/jquery.fancybox.pack.js"></script>
+
+
+<script src="/js/script.js"></script>
+
+  </div>
+</body>
+</html>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-griffin-site/blob/fbbfc965/archives/2018/index.html
----------------------------------------------------------------------
diff --git a/archives/2018/index.html b/archives/2018/index.html
new file mode 100644
index 0000000..901fc41
--- /dev/null
+++ b/archives/2018/index.html
@@ -0,0 +1,170 @@
+<!DOCTYPE html>
+<html>
+<head>
+  <meta charset="utf-8">
+  
+  <title>Archives: 2018 | Apache Griffin</title>
+  <meta name="viewport" content="width=device-width, initial-scale=1, 
maximum-scale=1">
+  <meta property="og:type" content="website">
+<meta property="og:title" content="Apache Griffin">
+<meta property="og:url" content="http://yoursite.com/archives/2018/index.html";>
+<meta property="og:site_name" content="Apache Griffin">
+<meta name="twitter:card" content="summary">
+<meta name="twitter:title" content="Apache Griffin">
+  
+    <link rel="alternate" href="/atom.xml" title="Apache Griffin" 
type="application/atom+xml">
+  
+  
+    <link rel="icon" href="/favicon.png">
+  
+  
+    <link href="//fonts.googleapis.com/css?family=Source+Code+Pro" 
rel="stylesheet" type="text/css">
+  
+  <link rel="stylesheet" href="/css/style.css">
+  
+
+</head>
+
+<body>
+  <div id="container">
+    <div id="wrap">
+      <header id="header">
+  <div id="banner"></div>
+  <div id="header-outer" class="outer">
+    <div id="header-title" class="inner">
+      <h1 id="logo-wrap">
+        <a href="/" id="logo">Apache Griffin</a>
+      </h1>
+      
+        <h2 id="subtitle-wrap">
+          <a href="/" id="subtitle">Data Quality Proposal for Streaming and 
Batch</a>
+        </h2>
+      
+    </div>
+    <div id="header-inner" class="inner">
+      <nav id="main-nav">
+        <a id="main-nav-toggle" class="nav-icon"></a>
+        
+          <a class="main-nav-link" href="/">Home</a>
+        
+          <a class="main-nav-link" href="/archives">Archives</a>
+        
+      </nav>
+      <nav id="sub-nav">
+        
+          <a id="nav-rss-link" class="nav-icon" href="/atom.xml" title="Flux 
RSS"></a>
+        
+        <a id="nav-search-btn" class="nav-icon" title="Rechercher"></a>
+      </nav>
+      <div id="search-form-wrap">
+        <form action="//google.com/search" method="get" accept-charset="UTF-8" 
class="search-form"><input type="search" name="q" results="0" 
class="search-form-input" placeholder="Search"><button type="submit" 
class="search-form-submit">&#xF002;</button><input type="hidden" 
name="sitesearch" value="http://yoursite.com";></form>
+      </div>
+    </div>
+  </div>
+</header>
+      <div class="outer">
+        <section id="main">
+  
+  
+    
+    
+      
+      
+      <section class="archives-wrap">
+        <div class="archive-year-wrap">
+          <a href="/archives/2018" class="archive-year">2018</a>
+        </div>
+        <div class="archives">
+    
+    <article class="archive-article archive-type-post">
+  <div class="archive-article-inner">
+    <header class="archive-article-header">
+      <a href="/2018/03/10/home/" class="archive-article-date">
+  <time datetime="2018-03-10T02:49:47.000Z" itemprop="datePublished">Mar 
10</time>
+</a>
+      
+  
+    <h1 itemprop="name">
+      <a class="archive-article-title" href="/2018/03/10/home/">Apache 
Griffin</a>
+    </h1>
+  
+
+    </header>
+  </div>
+</article>
+  
+  
+    </div></section>
+  
+
+</section>
+        
+          <aside id="sidebar">
+  
+    
+  <div class="widget-wrap">
+    <h3 class="widget-title">Links</h3>
+    <div class="widget">
+      <ul>
+        
+          <li>
+            <a href="/2018/03/10/home/">Apache Griffin</a>
+          </li>
+        
+          <li>
+            <a href="/2017/11/07/release/">Release</a>
+          </li>
+        
+          <li>
+            <a href="/2017/03/04/community/">Community</a>
+          </li>
+        
+          <li>
+            <a href="/2017/03/03/plan/">Plan</a>
+          </li>
+        
+      </ul>
+    </div>
+  </div>
+
+  
+      <div class="widget-wrap">
+    <div class="widget">
+        <img 
src="http://griffin.incubator.apache.org/images/egg-logo.png";></img>
+    </div>
+  </div>
+  
+</aside>
+        
+      </div>
+      <footer id="footer">
+  
+  <div class="outer">
+    <div id="footer-info" class="inner">
+      &copy; 2018
+      Propulsé by <a href="http://hexo.io/"; target="_blank">Hexo</a>
+    </div>
+  </div>
+</footer>
+    </div>
+    <nav id="mobile-nav">
+  
+    <a href="/" class="mobile-nav-link">Home</a>
+  
+    <a href="/archives" class="mobile-nav-link">Archives</a>
+  
+</nav>
+    
+
+<script 
src="//ajax.googleapis.com/ajax/libs/jquery/2.0.3/jquery.min.js"></script>
+
+
+  <link rel="stylesheet" href="/fancybox/jquery.fancybox.css">
+  <script src="/fancybox/jquery.fancybox.pack.js"></script>
+
+
+<script src="/js/script.js"></script>
+
+  </div>
+</body>
+</html>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-griffin-site/blob/fbbfc965/archives/index.html
----------------------------------------------------------------------
diff --git a/archives/index.html b/archives/index.html
index 4e4b0ea..afca514 100644
--- a/archives/index.html
+++ b/archives/index.html
@@ -52,9 +52,9 @@
       </nav>
       <nav id="sub-nav">
         
-          <a id="nav-rss-link" class="nav-icon" href="/atom.xml" title="RSS 
Feed"></a>
+          <a id="nav-rss-link" class="nav-icon" href="/atom.xml" title="Flux 
RSS"></a>
         
-        <a id="nav-search-btn" class="nav-icon" title="Search"></a>
+        <a id="nav-search-btn" class="nav-icon" title="Rechercher"></a>
       </nav>
       <div id="search-form-wrap">
         <form action="//google.com/search" method="get" accept-charset="UTF-8" 
class="search-form"><input type="search" name="q" results="0" 
class="search-form-input" placeholder="Search"><button type="submit" 
class="search-form-submit">&#xF002;</button><input type="hidden" 
name="sitesearch" value="http://yoursite.com";></form>
@@ -72,20 +72,20 @@
       
       <section class="archives-wrap">
         <div class="archive-year-wrap">
-          <a href="/archives/2017" class="archive-year">2017</a>
+          <a href="/archives/2018" class="archive-year">2018</a>
         </div>
         <div class="archives">
     
     <article class="archive-article archive-type-post">
   <div class="archive-article-inner">
     <header class="archive-article-header">
-      <a href="/2017/11/07/release/" class="archive-article-date">
-  <time datetime="2017-11-07T01:15:12.000Z" itemprop="datePublished">Nov 
7</time>
+      <a href="/2018/03/10/home/" class="archive-article-date">
+  <time datetime="2018-03-10T02:49:47.000Z" itemprop="datePublished">Mar 
10</time>
 </a>
       
   
     <h1 itemprop="name">
-      <a class="archive-article-title" href="/2017/11/07/release/">Release</a>
+      <a class="archive-article-title" href="/2018/03/10/home/">Apache 
Griffin</a>
     </h1>
   
 
@@ -95,16 +95,26 @@
   
     
     
+      
+        </div></section>
+      
+      
+      <section class="archives-wrap">
+        <div class="archive-year-wrap">
+          <a href="/archives/2017" class="archive-year">2017</a>
+        </div>
+        <div class="archives">
+    
     <article class="archive-article archive-type-post">
   <div class="archive-article-inner">
     <header class="archive-article-header">
-      <a href="/2017/03/30/home/" class="archive-article-date">
-  <time datetime="2017-03-30T02:49:47.000Z" itemprop="datePublished">Mar 
30</time>
+      <a href="/2017/11/07/release/" class="archive-article-date">
+  <time datetime="2017-11-07T01:15:12.000Z" itemprop="datePublished">Nov 
7</time>
 </a>
       
   
     <h1 itemprop="name">
-      <a class="archive-article-title" href="/2017/03/30/home/">Apache 
Griffin</a>
+      <a class="archive-article-title" href="/2017/11/07/release/">Release</a>
     </h1>
   
 
@@ -165,11 +175,11 @@
       <ul>
         
           <li>
-            <a href="/2017/11/07/release/">Release</a>
+            <a href="/2018/03/10/home/">Apache Griffin</a>
           </li>
         
           <li>
-            <a href="/2017/03/30/home/">Apache Griffin</a>
+            <a href="/2017/11/07/release/">Release</a>
           </li>
         
           <li>
@@ -199,7 +209,7 @@
   <div class="outer">
     <div id="footer-info" class="inner">
       &copy; 2018
-      Powered by <a href="http://hexo.io/"; target="_blank">Hexo</a>
+      Propulsé by <a href="http://hexo.io/"; target="_blank">Hexo</a>
     </div>
   </div>
 </footer>

http://git-wip-us.apache.org/repos/asf/incubator-griffin-site/blob/fbbfc965/index.html
----------------------------------------------------------------------
diff --git a/index.html b/index.html
index eca758d..a89258f 100644
--- a/index.html
+++ b/index.html
@@ -52,9 +52,9 @@
       </nav>
       <nav id="sub-nav">
         
-          <a id="nav-rss-link" class="nav-icon" href="/atom.xml" title="RSS 
Feed"></a>
+          <a id="nav-rss-link" class="nav-icon" href="/atom.xml" title="Flux 
RSS"></a>
         
-        <a id="nav-search-btn" class="nav-icon" title="Search"></a>
+        <a id="nav-search-btn" class="nav-icon" title="Rechercher"></a>
       </nav>
       <div id="search-form-wrap">
         <form action="//google.com/search" method="get" accept-charset="UTF-8" 
class="search-form"><input type="search" name="q" results="0" 
class="search-form-input" placeholder="Search"><button type="submit" 
class="search-form-submit">&#xF002;</button><input type="hidden" 
name="sitesearch" value="http://yoursite.com";></form>
@@ -65,6 +65,91 @@
       <div class="outer">
         <section id="main">
   
+    <article id="post-home" class="article article-type-post" itemscope 
itemprop="blogPost">
+  <div class="article-meta">
+    <a href="/2018/03/10/home/" class="article-date">
+  <time datetime="2018-03-10T02:49:47.000Z" 
itemprop="datePublished">2018-03-10</time>
+</a>
+    
+  </div>
+  <div class="article-inner">
+    
+    
+      <header class="article-header">
+        
+  
+    <h1 itemprop="name">
+      <a class="article-title" href="/2018/03/10/home/">Apache Griffin</a>
+    </h1>
+  
+
+      </header>
+    
+    <div class="article-entry" itemprop="articleBody">
+      
+        <h2 id="Abstract"><a href="#Abstract" class="headerlink" 
title="Abstract"></a>Abstract</h2><p>Apache Griffin is a Data Quality Service 
platform built on Apache Hadoop and Apache Spark. It provides a framework 
process for defining data quality model, executing data quality measurement, 
automating data profiling and validation, as well as a unified data quality 
visualization across multiple data systems.  It tries to address the data 
quality challenges in big data and streaming context.</p>
+<h2 id="Overview-of-Apache-Griffin"><a href="#Overview-of-Apache-Griffin" 
class="headerlink" title="Overview of Apache Griffin"></a>Overview of Apache 
Griffin</h2><p>When people use big data (Hadoop or other streaming systems), 
measurement of data quality is a big challenge. Different teams have built 
customized tools to detect and analyze data quality issues within their own 
domains. As a platform organization, we think of taking a platform approach to 
commonly occurring patterns. As such, we are building a platform to provide 
shared Infrastructure and generic features to solve common data quality pain 
points. This would enable us to build trusted data assets.</p>
+<p>Currently it is very difficult and costly to do data quality validation 
when we have large volumes of related data flowing across multi-platforms 
(streaming and batch). Take eBay’s Real-time Personalization Platform as a 
sample; Everyday we have to validate the data quality for ~600M records. Data 
quality often becomes one big challenge in this complex environment and massive 
scale.</p>
+<p>We detect the following at eBay:</p>
+<ol>
+<li>Lack of an end-to-end, unified view of data quality from multiple data 
sources to target applications that takes into account the lineage of the data. 
This results in a long time to identify and fix data quality issues.</li>
+<li>Lack of a system to measure data quality in streaming mode through 
self-service. The need is for a system where datasets can be registered, data 
quality models can be defined, data quality can be visualized and monitored 
using a simple tool and teams alerted when an issue is detected.</li>
+<li>Lack of a Shared platform and API Service. Every team should not have to 
apply and manage own hardware and software infrastructure to solve this common 
problem.</li>
+</ol>
+<p>With these in mind, we decided to build Apache Griffin - A data quality 
service that aims to solve the above short-comings.</p>
+<p>Apache Griffin includes:</p>
+<p><strong>Data Quality Model Engine</strong>: Apache Griffin is model driven 
solution, user can choose various data quality dimension to execute his/her 
data quality validation based on selected target data-set or source data-set ( 
as the golden reference data). It has corresponding library supporting it in 
back-end for the following measurement:</p>
+<ul>
+<li>Accuracy - Does data reflect the real-world objects or a verifiable 
source</li>
+<li>Completeness - Is all necessary data present</li>
+<li>Validity -  Are all data values within the data domains specified by the 
business</li>
+<li>Timeliness - Is the data available at the time needed</li>
+<li>Anomaly detection -  Pre-built algorithm functions for the identification 
of items, events or observations which do not conform to an expected pattern or 
other items in a dataset</li>
+<li>Data Profiling - Apply statistical analysis and assessment of data values 
within a dataset for consistency, uniqueness and logic.</li>
+</ul>
+<p><strong>Data Collection Layer</strong>:</p>
+<p>We support two kinds of data sources, batch data and real time data.</p>
+<p>For batch mode, we can collect data source from  our Hadoop platform by 
various data connectors.</p>
+<p>For real time mode, we can connect with messaging system like Kafka to near 
real time analysis.</p>
+<p><strong>Data Process and Storage Layer</strong>:</p>
+<p>For batch analysis, our data quality model will compute data quality 
metrics in our spark cluster based on data source in hadoop.</p>
+<p>For near real time analysis, we consume data from messaging system, then 
our data quality model will compute our real time data quality metrics in our 
spark cluster. for data storage, we use time series database in our back end to 
fulfill front end request.</p>
+<p><strong>Apache Griffin Service</strong>:</p>
+<p>We have RESTful web services to accomplish all the functionalities of 
Apache Griffin, such as register data-set, create data quality model, publish 
metrics, retrieve metrics, add subscription, etc. So, the developers can 
develop their own user interface based on these web serivces.</p>
+<h2 id="Main-business-process"><a href="#Main-business-process" 
class="headerlink" title="Main business process"></a>Main business 
process</h2><p><img src="/images/Business_Process.png" alt=""></p>
+<h2 id="Architecture-diagram"><a href="#Architecture-diagram" 
class="headerlink" title="Architecture diagram"></a>Architecture 
diagram</h2><p><img src="/images/arch.png" alt=""></p>
+<h2 id="Tech-stack"><a href="#Tech-stack" class="headerlink" title="Tech 
stack"></a>Tech stack</h2><p><img src="/images/techstack.png" alt=""></p>
+<h2 id="Rationale"><a href="#Rationale" class="headerlink" 
title="Rationale"></a>Rationale</h2><p>The challenge we face at eBay is that 
our data volume is becoming bigger and bigger, systems process become more 
complex, while we do not have a unified data quality solution to ensure the 
trusted data sets which provide confidences on data quality to our data 
consumers.  The key challenges on data quality includes:</p>
+<ol>
+<li>Existing commercial data quality solution cannot address data quality 
lineage among systems, cannot scale out to support fast growing data at 
eBay</li>
+<li>Existing eBay’s domain specific tools take a long time to identify and 
fix poor data quality when data flowed through multiple systems</li>
+<li>Business logic becomes complex, requires data quality system much 
flexible.</li>
+<li>Some data quality issues do have business impact on user experiences, 
revenue, efficiency &amp; compliance.</li>
+<li>Communication overhead of data quality metrics, typically in a big 
organization, which involve different teams.</li>
+</ol>
+<p>The idea of  Apache Apache Griffin is to provide Data Quality validation as 
a Service, to allow data engineers and data consumers to have:</p>
+<ul>
+<li>Near real-time understanding of the data quality health of your data 
pipelines with end-to-end monitoring, all in one place.</li>
+<li>Profiling, detecting and correlating issues and providing recommendations 
that drive rapid and focused troubleshooting</li>
+<li>A centralized data quality model management system including rule, 
metadata, scheduler etc.  </li>
+<li>Native code generation to run everywhere, including Hadoop, Kafka, Spark, 
etc.</li>
+<li>One set of tools to build data quality pipelines across all eBay data 
platforms.</li>
+</ul>
+<h2 id="Disclaimer"><a href="#Disclaimer" class="headerlink" 
title="Disclaimer"></a>Disclaimer</h2><p>Apache Griffin is an effort undergoing 
incubation at The Apache Software Foundation (ASF), sponsored by the Apache 
Incubator. Incubation is required of all newly accepted projects until a 
further review indicates that the infrastructure, communications, and decision 
making process have stabilized in a manner consistent with other successful ASF 
projects. While incubation status is not necessarily a reflection of the 
completeness or stability of the code, it does indicate that the project has 
yet to be fully endorsed by the ASF.</p>
+
+      
+    </div>
+    <footer class="article-footer">
+      <a data-url="http://yoursite.com/2018/03/10/home/"; 
data-id="cjequ0plq0001c2po1nm44ixq" class="article-share-link">Partager</a>
+      
+      
+    </footer>
+  </div>
+  
+</article>
+
+
+  
     <article id="post-release" class="article article-type-post" itemscope 
itemprop="blogPost">
   <div class="article-meta">
     <a href="/2017/11/07/release/" class="article-date">
@@ -179,92 +264,7 @@
       
     </div>
     <footer class="article-footer">
-      <a data-url="http://yoursite.com/2017/11/07/release/"; 
data-id="cjeqth4110003rzpo89pdl2vz" class="article-share-link">Share</a>
-      
-      
-    </footer>
-  </div>
-  
-</article>
-
-
-  
-    <article id="post-home" class="article article-type-post" itemscope 
itemprop="blogPost">
-  <div class="article-meta">
-    <a href="/2017/03/30/home/" class="article-date">
-  <time datetime="2017-03-30T02:49:47.000Z" 
itemprop="datePublished">2017-03-30</time>
-</a>
-    
-  </div>
-  <div class="article-inner">
-    
-    
-      <header class="article-header">
-        
-  
-    <h1 itemprop="name">
-      <a class="article-title" href="/2017/03/30/home/">Apache Griffin</a>
-    </h1>
-  
-
-      </header>
-    
-    <div class="article-entry" itemprop="articleBody">
-      
-        <h2 id="Abstract"><a href="#Abstract" class="headerlink" 
title="Abstract"></a>Abstract</h2><p>Apache Griffin is a Data Quality Service 
platform built on Apache Hadoop and Apache Spark. It provides a framework 
process for defining data quality model, executing data quality measurement, 
automating data profiling and validation, as well as a unified data quality 
visualization across multiple data systems.  It tries to address the data 
quality challenges in big data and streaming context.</p>
-<h2 id="Overview-of-Apache-Griffin"><a href="#Overview-of-Apache-Griffin" 
class="headerlink" title="Overview of Apache Griffin"></a>Overview of Apache 
Griffin</h2><p>When people use big data (Hadoop or other streaming systems), 
measurement of data quality is a big challenge. Different teams have built 
customized tools to detect and analyze data quality issues within their own 
domains. As a platform organization, we think of taking a platform approach to 
commonly occurring patterns. As such, we are building a platform to provide 
shared Infrastructure and generic features to solve common data quality pain 
points. This would enable us to build trusted data assets.</p>
-<p>Currently it is very difficult and costly to do data quality validation 
when we have large volumes of related data flowing across multi-platforms 
(streaming and batch). Take eBay’s Real-time Personalization Platform as a 
sample; Everyday we have to validate the data quality for ~600M records. Data 
quality often becomes one big challenge in this complex environment and massive 
scale.</p>
-<p>We detect the following at eBay:</p>
-<ol>
-<li>Lack of an end-to-end, unified view of data quality from multiple data 
sources to target applications that takes into account the lineage of the data. 
This results in a long time to identify and fix data quality issues.</li>
-<li>Lack of a system to measure data quality in streaming mode through 
self-service. The need is for a system where datasets can be registered, data 
quality models can be defined, data quality can be visualized and monitored 
using a simple tool and teams alerted when an issue is detected.</li>
-<li>Lack of a Shared platform and API Service. Every team should not have to 
apply and manage own hardware and software infrastructure to solve this common 
problem.</li>
-</ol>
-<p>With these in mind, we decided to build Apache Griffin - A data quality 
service that aims to solve the above short-comings.</p>
-<p>Apache Griffin includes:</p>
-<p><strong>Data Quality Model Engine</strong>: Apache Griffin is model driven 
solution, user can choose various data quality dimension to execute his/her 
data quality validation based on selected target data-set or source data-set ( 
as the golden reference data). It has corresponding library supporting it in 
back-end for the following measurement:</p>
-<ul>
-<li>Accuracy - Does data reflect the real-world objects or a verifiable 
source</li>
-<li>Completeness - Is all necessary data present</li>
-<li>Validity -  Are all data values within the data domains specified by the 
business</li>
-<li>Timeliness - Is the data available at the time needed</li>
-<li>Anomaly detection -  Pre-built algorithm functions for the identification 
of items, events or observations which do not conform to an expected pattern or 
other items in a dataset</li>
-<li>Data Profiling - Apply statistical analysis and assessment of data values 
within a dataset for consistency, uniqueness and logic.</li>
-</ul>
-<p><strong>Data Collection Layer</strong>:</p>
-<p>We support two kinds of data sources, batch data and real time data.</p>
-<p>For batch mode, we can collect data source from  our Hadoop platform by 
various data connectors.</p>
-<p>For real time mode, we can connect with messaging system like Kafka to near 
real time analysis.</p>
-<p><strong>Data Process and Storage Layer</strong>:</p>
-<p>For batch analysis, our data quality model will compute data quality 
metrics in our spark cluster based on data source in hadoop.</p>
-<p>For near real time analysis, we consume data from messaging system, then 
our data quality model will compute our real time data quality metrics in our 
spark cluster. for data storage, we use time series database in our back end to 
fulfill front end request.</p>
-<p><strong>Apache Griffin Service</strong>:</p>
-<p>We have RESTful web services to accomplish all the functionalities of 
Apache Griffin, such as register data-set, create data quality model, publish 
metrics, retrieve metrics, add subscription, etc. So, the developers can 
develop their own user interface based on these web serivces.</p>
-<h2 id="Main-business-process"><a href="#Main-business-process" 
class="headerlink" title="Main business process"></a>Main business 
process</h2><p><img src="/images/Business_Process.png" alt=""></p>
-<h2 id="Architecture-diagram"><a href="#Architecture-diagram" 
class="headerlink" title="Architecture diagram"></a>Architecture 
diagram</h2><p><img src="/images/arch.png" alt=""></p>
-<h2 id="Tech-stack"><a href="#Tech-stack" class="headerlink" title="Tech 
stack"></a>Tech stack</h2><p><img src="/images/techstack.png" alt=""></p>
-<h2 id="Rationale"><a href="#Rationale" class="headerlink" 
title="Rationale"></a>Rationale</h2><p>The challenge we face at eBay is that 
our data volume is becoming bigger and bigger, systems process become more 
complex, while we do not have a unified data quality solution to ensure the 
trusted data sets which provide confidences on data quality to our data 
consumers.  The key challenges on data quality includes:</p>
-<ol>
-<li>Existing commercial data quality solution cannot address data quality 
lineage among systems, cannot scale out to support fast growing data at 
eBay</li>
-<li>Existing eBay’s domain specific tools take a long time to identify and 
fix poor data quality when data flowed through multiple systems</li>
-<li>Business logic becomes complex, requires data quality system much 
flexible.</li>
-<li>Some data quality issues do have business impact on user experiences, 
revenue, efficiency &amp; compliance.</li>
-<li>Communication overhead of data quality metrics, typically in a big 
organization, which involve different teams.</li>
-</ol>
-<p>The idea of  Apache Apache Griffin is to provide Data Quality validation as 
a Service, to allow data engineers and data consumers to have:</p>
-<ul>
-<li>Near real-time understanding of the data quality health of your data 
pipelines with end-to-end monitoring, all in one place.</li>
-<li>Profiling, detecting and correlating issues and providing recommendations 
that drive rapid and focused troubleshooting</li>
-<li>A centralized data quality model management system including rule, 
metadata, scheduler etc.  </li>
-<li>Native code generation to run everywhere, including Hadoop, Kafka, Spark, 
etc.</li>
-<li>One set of tools to build data quality pipelines across all eBay data 
platforms.</li>
-</ul>
-<h2 id="Disclaimer"><a href="#Disclaimer" class="headerlink" 
title="Disclaimer"></a>Disclaimer</h2><p>Apache Griffin is an effort undergoing 
incubation at The Apache Software Foundation (ASF), sponsored by the Apache 
Incubator. Incubation is required of all newly accepted projects until a 
further review indicates that the infrastructure, communications, and decision 
making process have stabilized in a manner consistent with other successful ASF 
projects. While incubation status is not necessarily a reflection of the 
completeness or stability of the code, it does indicate that the project has 
yet to be fully endorsed by the ASF.</p>
-
-      
-    </div>
-    <footer class="article-footer">
-      <a data-url="http://yoursite.com/2017/03/30/home/"; 
data-id="cjeqth40z0002rzpo41wfk6kb" class="article-share-link">Share</a>
+      <a data-url="http://yoursite.com/2017/11/07/release/"; 
data-id="cjequ0plt0003c2po150098k6" class="article-share-link">Partager</a>
       
       
     </footer>
@@ -332,7 +332,7 @@
       
     </div>
     <footer class="article-footer">
-      <a data-url="http://yoursite.com/2017/03/04/community/"; 
data-id="cjeqth40t0000rzpofzb69p9l" class="article-share-link">Share</a>
+      <a data-url="http://yoursite.com/2017/03/04/community/"; 
data-id="cjequ0pls0002c2pocjijfgio" class="article-share-link">Partager</a>
       
       
     </footer>
@@ -461,7 +461,7 @@
       
     </div>
     <footer class="article-footer">
-      <a data-url="http://yoursite.com/2017/03/03/plan/"; 
data-id="cjeqth40x0001rzpomekh1njt" class="article-share-link">Share</a>
+      <a data-url="http://yoursite.com/2017/03/03/plan/"; 
data-id="cjequ0plm0000c2ponumo1lae" class="article-share-link">Partager</a>
       
       
     </footer>
@@ -483,11 +483,11 @@
       <ul>
         
           <li>
-            <a href="/2017/11/07/release/">Release</a>
+            <a href="/2018/03/10/home/">Apache Griffin</a>
           </li>
         
           <li>
-            <a href="/2017/03/30/home/">Apache Griffin</a>
+            <a href="/2017/11/07/release/">Release</a>
           </li>
         
           <li>
@@ -517,7 +517,7 @@
   <div class="outer">
     <div id="footer-info" class="inner">
       &copy; 2018
-      Powered by <a href="http://hexo.io/"; target="_blank">Hexo</a>
+      Propulsé by <a href="http://hexo.io/"; target="_blank">Hexo</a>
     </div>
   </div>
 </footer>

Reply via email to