incubator-griffin-site git commit: how to build this website

guoyp Mon, 20 Mar 2017 14:24:43 -0700

Repository: incubator-griffin-site
Updated Branches:
  refs/heads/master c8612f14b -> b4142cf0e



how to build this website


Project: http://git-wip-us.apache.org/repos/asf/incubator-griffin-site/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-griffin-site/commit/b4142cf0
Tree: 
http://git-wip-us.apache.org/repos/asf/incubator-griffin-site/tree/b4142cf0
Diff: 
http://git-wip-us.apache.org/repos/asf/incubator-griffin-site/diff/b4142cf0

Branch: refs/heads/master
Commit: b4142cf0ea2df51cf68f4857eff2e17299212aa0
Parents: c8612f1
Author: William Guo <[email protected]>
Authored: Mon Mar 20 14:23:46 2017 -0700
Committer: William Guo <[email protected]>
Committed: Mon Mar 20 14:23:46 2017 -0700

----------------------------------------------------------------------
 .gitignore |  2 ++
 db.json    |  2 +-
 readme.md  | 32 +++++++++++++++++++++++++++++++-
 3 files changed, 34 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-griffin-site/blob/b4142cf0/.gitignore
----------------------------------------------------------------------
diff --git a/.gitignore b/.gitignore
index f116f3e..48fa24c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,4 @@
 public
 node_modules/*
+.deploy_git/*
+

http://git-wip-us.apache.org/repos/asf/incubator-griffin-site/blob/b4142cf0/db.json
----------------------------------------------------------------------
diff --git a/db.json b/db.json
index 01a5163..d4b1d64 100644
--- a/db.json
+++ b/db.json
@@ -1 +1 @@
-{"meta":{"version":1,"warehouse":"2.2.0"},"models":{"Asset":[{"_id":"themes/landscape/source/css/style.styl","path":"css/style.styl","modified":0,"renderable":1},{"_id":"themes/landscape/source/fancybox/fancybox_loading.gif","path":"fancybox/fancybox_loading.gif","modified":0,"renderable":1},{"_id":"themes/landscape/source/fancybox/blank.gif","path":"fancybox/blank.gif","modified":0,"renderable":1},{"_id":"themes/landscape/source/fancybox/[email protected]","path":"fancybox/[email protected]","modified":0,"renderable":1},{"_id":"themes/landscape/source/fancybox/fancybox_overlay.png","path":"fancybox/fancybox_overlay.png","modified":0,"renderable":1},{"_id":"themes/landscape/source/fancybox/fancybox_sprite.png","path":"fancybox/fancybox_sprite.png","modified":0,"renderable":1},{"_id":"themes/landscape/source/fancybox/[email protected]","path":"fancybox/[email protected]","modified":0,"renderable":1},{"_id":"themes/landscape/source/fancybox/jquery.fancybox.css","
 
path":"fancybox/jquery.fancybox.css","modified":0,"renderable":1},{"_id":"themes/landscape/source/js/script.js","path":"js/script.js","modified":0,"renderable":1},{"_id":"themes/landscape/source/fancybox/jquery.fancybox.pack.js","path":"fancybox/jquery.fancybox.pack.js","modified":0,"renderable":1},{"_id":"themes/landscape/source/fancybox/jquery.fancybox.js","path":"fancybox/jquery.fancybox.js","modified":0,"renderable":1},{"_id":"themes/landscape/source/css/fonts/fontawesome-webfont.eot","path":"css/fonts/fontawesome-webfont.eot","modified":0,"renderable":1},{"_id":"themes/landscape/source/css/fonts/FontAwesome.otf","path":"css/fonts/FontAwesome.otf","modified":0,"renderable":1},{"_id":"themes/landscape/source/css/fonts/fontawesome-webfont.woff","path":"css/fonts/fontawesome-webfont.woff","modified":0,"renderable":1},{"_id":"themes/landscape/source/fancybox/helpers/fancybox_buttons.png","path":"fancybox/helpers/fancybox_buttons.png","modified":0,"renderable":1},{"_id":"themes/lands
 
cape/source/fancybox/helpers/jquery.fancybox-buttons.css","path":"fancybox/helpers/jquery.fancybox-buttons.css","modified":0,"renderable":1},{"_id":"themes/landscape/source/fancybox/helpers/jquery.fancybox-buttons.js","path":"fancybox/helpers/jquery.fancybox-buttons.js","modified":0,"renderable":1},{"_id":"themes/landscape/source/fancybox/helpers/jquery.fancybox-media.js","path":"fancybox/helpers/jquery.fancybox-media.js","modified":0,"renderable":1},{"_id":"themes/landscape/source/fancybox/helpers/jquery.fancybox-thumbs.css","path":"fancybox/helpers/jquery.fancybox-thumbs.css","modified":0,"renderable":1},{"_id":"themes/landscape/source/fancybox/helpers/jquery.fancybox-thumbs.js","path":"fancybox/helpers/jquery.fancybox-thumbs.js","modified":0,"renderable":1},{"_id":"themes/landscape/source/css/fonts/fontawesome-webfont.ttf","path":"css/fonts/fontawesome-webfont.ttf","modified":0,"renderable":1},{"_id":"themes/landscape/source/css/fonts/fontawesome-webfont.svg","path":"css/fonts/fo
 
ntawesome-webfont.svg","modified":0,"renderable":1},{"_id":"themes/landscape/source/css/images/banner.jpg","path":"css/images/banner.jpg","modified":0,"renderable":1},{"_id":"source/images/Business_Process.png","path":"images/Business_Process.png","modified":0,"renderable":0}],"Cache":[{"_id":"themes/landscape/.gitignore","hash":"58d26d4b5f2f94c2d02a4e4a448088e4a2527c77","modified":1490040584000},{"_id":"themes/landscape/Gruntfile.js","hash":"71adaeaac1f3cc56e36c49d549b8d8a72235c9b9","modified":1490040584000},{"_id":"themes/landscape/LICENSE","hash":"c480fce396b23997ee23cc535518ffaaf7f458f8","modified":1490040584000},{"_id":"themes/landscape/README.md","hash":"c7e83cfe8f2c724fc9cac32bd71bb5faf9ceeddb","modified":1490040584000},{"_id":"themes/landscape/_config.yml","hash":"fb8c98a0f6ff9f962637f329c22699721854cd73","modified":1490040584000},{"_id":"themes/landscape/package.json","hash":"85358dc34311c6662e841584e206a4679183943f","modified":1490040584000},{"_id":"source/_posts/hello-wor
 
ld.md","hash":"0637e7741a9bb9db8b8c77a06dc6d55753762546","modified":1490042358000},{"_id":"themes/landscape/languages/default.yml","hash":"3083f319b352d21d80fc5e20113ddf27889c9d11","modified":1490040584000},{"_id":"themes/landscape/languages/fr.yml","hash":"84ab164b37c6abf625473e9a0c18f6f815dd5fd9","modified":1490040584000},{"_id":"themes/landscape/languages/nl.yml","hash":"12ed59faba1fc4e8cdd1d42ab55ef518dde8039c","modified":1490040584000},{"_id":"themes/landscape/languages/no.yml","hash":"965a171e70347215ec726952e63f5b47930931ef","modified":1490040584000},{"_id":"themes/landscape/languages/ru.yml","hash":"4fda301bbd8b39f2c714e2c934eccc4b27c0a2b0","modified":1490040584000},{"_id":"themes/landscape/languages/zh-CN.yml","hash":"ca40697097ab0b3672a80b455d3f4081292d1eed","modified":1490040584000},{"_id":"themes/landscape/languages/zh-TW.yml","hash":"53ce3000c5f767759c7d2c4efcaa9049788599c3","modified":1490040584000},{"_id":"themes/landscape/layout/archive.ejs","hash":"2703b07cc8ac64ae4
 
6d1d263f4653013c7e1666b","modified":1490040584000},{"_id":"themes/landscape/layout/category.ejs","hash":"765426a9c8236828dc34759e604cc2c52292835a","modified":1490040584000},{"_id":"themes/landscape/layout/index.ejs","hash":"aa1b4456907bdb43e629be3931547e2d29ac58c8","modified":1490040584000},{"_id":"themes/landscape/layout/layout.ejs","hash":"f155824ca6130080bb057fa3e868a743c69c4cf5","modified":1490040584000},{"_id":"themes/landscape/layout/page.ejs","hash":"7d80e4e36b14d30a7cd2ac1f61376d9ebf264e8b","modified":1490040584000},{"_id":"themes/landscape/layout/post.ejs","hash":"7d80e4e36b14d30a7cd2ac1f61376d9ebf264e8b","modified":1490040584000},{"_id":"themes/landscape/layout/tag.ejs","hash":"eaa7b4ccb2ca7befb90142e4e68995fb1ea68b2e","modified":1490040584000},{"_id":"themes/landscape/scripts/fancybox.js","hash":"aa411cd072399df1ddc8e2181a3204678a5177d9","modified":1490040584000},{"_id":"themes/landscape/layout/_partial/after-footer.ejs","hash":"82a30f81c0e8ba4a8af17acd6cc99e93834e4d5e","
 
modified":1490040584000},{"_id":"themes/landscape/layout/_partial/archive.ejs","hash":"931aaaffa0910a48199388ede576184ff15793ee","modified":1490040584000},{"_id":"themes/landscape/layout/_partial/archive-post.ejs","hash":"c7a71425a946d05414c069ec91811b5c09a92c47","modified":1490040584000},{"_id":"themes/landscape/layout/_partial/article.ejs","hash":"c4c835615d96a950d51fa2c3b5d64d0596534fed","modified":1490040584000},{"_id":"themes/landscape/layout/_partial/footer.ejs","hash":"93518893cf91287e797ebac543c560e2a63b8d0e","modified":1490040584000},{"_id":"themes/landscape/layout/_partial/google-analytics.ejs","hash":"f921e7f9223d7c95165e0f835f353b2938e40c45","modified":1490040584000},{"_id":"themes/landscape/layout/_partial/head.ejs","hash":"4fe8853e864d192701c03e5cd3a5390287b90612","modified":1490040584000},{"_id":"themes/landscape/layout/_partial/header.ejs","hash":"c21ca56f419d01a9f49c27b6be9f4a98402b2aa3","modified":1490040584000},{"_id":"themes/landscape/layout/_partial/mobile-nav.e
 
js","hash":"e952a532dfc583930a666b9d4479c32d4a84b44e","modified":1490040584000},{"_id":"themes/landscape/layout/_partial/sidebar.ejs","hash":"930da35cc2d447a92e5ee8f835735e6fd2232469","modified":1490040584000},{"_id":"themes/landscape/layout/_widget/archive.ejs","hash":"beb4a86fcc82a9bdda9289b59db5a1988918bec3","modified":1490040584000},{"_id":"themes/landscape/layout/_widget/category.ejs","hash":"dd1e5af3c6af3f5d6c85dfd5ca1766faed6a0b05","modified":1490040584000},{"_id":"themes/landscape/layout/_widget/recent_posts.ejs","hash":"0d4f064733f8b9e45c0ce131fe4a689d570c883a","modified":1490040584000},{"_id":"themes/landscape/layout/_widget/tagcloud.ejs","hash":"b4a2079101643f63993dcdb32925c9b071763b46","modified":1490040584000},{"_id":"themes/landscape/layout/_widget/tag.ejs","hash":"2de380865df9ab5f577f7d3bcadf44261eb5faae","modified":1490040584000},{"_id":"themes/landscape/source/css/_extend.styl","hash":"222fbe6d222531d61c1ef0f868c90f747b1c2ced","modified":1490040584000},{"_id":"theme
 
s/landscape/source/css/_variables.styl","hash":"5e37a6571caf87149af83ac1cc0cdef99f117350","modified":1490040584000},{"_id":"themes/landscape/source/css/style.styl","hash":"a70d9c44dac348d742702f6ba87e5bb3084d65db","modified":1490040584000},{"_id":"themes/landscape/source/fancybox/fancybox_loading.gif","hash":"1a755fb2599f3a313cc6cfdb14df043f8c14a99c","modified":1490040584000},{"_id":"themes/landscape/source/fancybox/blank.gif","hash":"2daeaa8b5f19f0bc209d976c02bd6acb51b00b0a","modified":1490040584000},{"_id":"themes/landscape/source/fancybox/[email protected]","hash":"273b123496a42ba45c3416adb027cd99745058b0","modified":1490040584000},{"_id":"themes/landscape/source/fancybox/fancybox_overlay.png","hash":"b3a4ee645ba494f52840ef8412015ba0f465dbe0","modified":1490040584000},{"_id":"themes/landscape/source/fancybox/fancybox_sprite.png","hash":"17df19f97628e77be09c352bf27425faea248251","modified":1490040584000},{"_id":"themes/landscape/source/fancybox/[email protected]","hash"
 
:"30c58913f327e28f466a00f4c1ac8001b560aed8","modified":1490040584000},{"_id":"themes/landscape/source/fancybox/jquery.fancybox.css","hash":"aaa582fb9eb4b7092dc69fcb2d5b1c20cca58ab6","modified":1490040584000},{"_id":"themes/landscape/source/js/script.js","hash":"2876e0b19ce557fca38d7c6f49ca55922ab666a1","modified":1490040584000},{"_id":"themes/landscape/source/fancybox/jquery.fancybox.pack.js","hash":"9e0d51ca1dbe66f6c0c7aefd552dc8122e694a6e","modified":1490040584000},{"_id":"themes/landscape/source/fancybox/jquery.fancybox.js","hash":"d08b03a42d5c4ba456ef8ba33116fdbb7a9cabed","modified":1490040584000},{"_id":"themes/landscape/layout/_partial/post/category.ejs","hash":"c6bcd0e04271ffca81da25bcff5adf3d46f02fc0","modified":1490040584000},{"_id":"themes/landscape/layout/_partial/post/date.ejs","hash":"6197802873157656e3077c5099a7dda3d3b01c29","modified":1490040584000},{"_id":"themes/landscape/layout/_partial/post/gallery.ejs","hash":"3d9d81a3c693ff2378ef06ddb6810254e509de5b","modified":
 
1490040584000},{"_id":"themes/landscape/layout/_partial/post/nav.ejs","hash":"16a904de7bceccbb36b4267565f2215704db2880","modified":1490040584000},{"_id":"themes/landscape/layout/_partial/post/tag.ejs","hash":"2fcb0bf9c8847a644167a27824c9bb19ac74dd14","modified":1490040584000},{"_id":"themes/landscape/layout/_partial/post/title.ejs","hash":"2f275739b6f1193c123646a5a31f37d48644c667","modified":1490040584000},{"_id":"themes/landscape/source/css/_partial/archive.styl","hash":"db15f5677dc68f1730e82190bab69c24611ca292","modified":1490040584000},{"_id":"themes/landscape/source/css/_partial/article.styl","hash":"10685f8787a79f79c9a26c2f943253450c498e3e","modified":1490040584000},{"_id":"themes/landscape/source/css/_partial/comment.styl","hash":"79d280d8d203abb3bd933ca9b8e38c78ec684987","modified":1490040584000},{"_id":"themes/landscape/source/css/_partial/footer.styl","hash":"e35a060b8512031048919709a8e7b1ec0e40bc1b","modified":1490040584000},{"_id":"themes/landscape/source/css/_partial/hea
 
der.styl","hash":"85ab11e082f4dd86dde72bed653d57ec5381f30c","modified":1490040584000},{"_id":"themes/landscape/source/css/_partial/highlight.styl","hash":"bf4e7be1968dad495b04e83c95eac14c4d0ad7c0","modified":1490040584000},{"_id":"themes/landscape/source/css/_partial/mobile.styl","hash":"a399cf9e1e1cec3e4269066e2948d7ae5854d745","modified":1490040584000},{"_id":"themes/landscape/source/css/_partial/sidebar-aside.styl","hash":"890349df5145abf46ce7712010c89237900b3713","modified":1490040584000},{"_id":"themes/landscape/source/css/_partial/sidebar-bottom.styl","hash":"8fd4f30d319542babfd31f087ddbac550f000a8a","modified":1490040584000},{"_id":"themes/landscape/source/css/_partial/sidebar.styl","hash":"404ec059dc674a48b9ab89cd83f258dec4dcb24d","modified":1490040584000},{"_id":"themes/landscape/source/css/_util/grid.styl","hash":"0bf55ee5d09f193e249083602ac5fcdb1e571aed","modified":1490040584000},{"_id":"themes/landscape/source/css/_util/mixin.styl","hash":"44f32767d9fd3c1c08a60d91f181ee5
 
3c8f0dbb3","modified":1490040584000},{"_id":"themes/landscape/source/css/fonts/fontawesome-webfont.eot","hash":"7619748fe34c64fb157a57f6d4ef3678f63a8f5e","modified":1490040584000},{"_id":"themes/landscape/source/css/fonts/FontAwesome.otf","hash":"b5b4f9be85f91f10799e87a083da1d050f842734","modified":1490040584000},{"_id":"themes/landscape/source/css/fonts/fontawesome-webfont.woff","hash":"04c3bf56d87a0828935bd6b4aee859995f321693","modified":1490040584000},{"_id":"themes/landscape/source/fancybox/helpers/fancybox_buttons.png","hash":"e385b139516c6813dcd64b8fc431c364ceafe5f3","modified":1490040584000},{"_id":"themes/landscape/source/fancybox/helpers/jquery.fancybox-buttons.css","hash":"1a9d8e5c22b371fcc69d4dbbb823d9c39f04c0c8","modified":1490040584000},{"_id":"themes/landscape/source/fancybox/helpers/jquery.fancybox-buttons.js","hash":"dc3645529a4bf72983a39fa34c1eb9146e082019","modified":1490040584000},{"_id":"themes/landscape/source/fancybox/helpers/jquery.fancybox-media.js","hash":"2
 
94420f9ff20f4e3584d212b0c262a00a96ecdb3","modified":1490040584000},{"_id":"themes/landscape/source/fancybox/helpers/jquery.fancybox-thumbs.css","hash":"4ac329c16a5277592fc12a37cca3d72ca4ec292f","modified":1490040584000},{"_id":"themes/landscape/source/fancybox/helpers/jquery.fancybox-thumbs.js","hash":"47da1ae5401c24b5c17cc18e2730780f5c1a7a0c","modified":1490040584000},{"_id":"themes/landscape/source/css/fonts/fontawesome-webfont.ttf","hash":"7f09c97f333917034ad08fa7295e916c9f72fd3f","modified":1490040584000},{"_id":"themes/landscape/source/css/fonts/fontawesome-webfont.svg","hash":"46fcc0194d75a0ddac0a038aee41b23456784814","modified":1490040584000},{"_id":"themes/landscape/source/css/images/banner.jpg","hash":"f44aa591089fcb3ec79770a1e102fd3289a7c6a6","modified":1490040584000},{"_id":"public/2017/03/20/hello-world/index.html","hash":"763f2d23f38c98ef40d246acee4c5fb1e4b90f03","modified":1490042215015},{"_id":"public/archives/index.html","hash":"144262bd43b07b6c968651e9dfa0f9e281fe6d
 
1a","modified":1490042067730},{"_id":"public/archives/2017/index.html","hash":"c3c977357060f220a6e698d02fc2ccac45384d14","modified":1490042067732},{"_id":"public/index.html","hash":"45e436e11b9a07916c9bd37e42e2eb75b155df14","modified":1490042067732},{"_id":"public/archives/2017/03/index.html","hash":"ccd23b71511266d975544d19fda445d15979d458","modified":1490042067732},{"_id":"source/images/Business_Process.png","hash":"07776b4ec09c3ca286f1d0d1537cd89d3c053dff","modified":1489114942000},{"_id":"public/images/Business_Process.png","hash":"07776b4ec09c3ca286f1d0d1537cd89d3c053dff","modified":1490042215016}],"Category":[],"Data":[],"Page":[],"Post":[{"title":"Apache
 Griffin","_content":"\n## Abstract\nApache Griffin is a Data Quality Service 
platform built on Apache Hadoop and Apache Spark. It provides a framework 
process for defining data quality model, executing data quality measurement, 
automating data profiling and validation, as well as a unified data quality 
visualization across mu
 ltiple data systems.  It tries to address the data quality challenges in big 
data and streaming context.\n\n\n## Overview of Apache Griffin  \nAt eBay, when 
people use big data (Hadoop or other streaming systems), measurement of data 
quality is a big challenge. Different teams have built customized tools to 
detect and analyze data quality issues within their own domains. As a platform 
organization, we think of taking a platform approach to commonly occurring 
patterns. As such, we are building a platform to provide shared Infrastructure 
and generic features to solve common data quality pain points. This would 
enable us to build trusted data assets.\n\nCurrently it is very difficult and 
costly to do data quality validation when we have large volumes of related data 
flowing across multi-platforms (streaming and batch). Take eBay's Real-time 
Personalization Platform as a sample; Everyday we have to validate the data 
quality for ~600M records. Data quality often becomes one big challenge
  in this complex environment and massive scale.\n\nWe detect the following at 
eBay:\n\n1. Lack of an end-to-end, unified view of data quality from multiple 
data sources to target applications that takes into account the lineage of the 
data. This results in a long time to identify and fix data quality issues.\n2. 
Lack of a system to measure data quality in streaming mode through 
self-service. The need is for a system where datasets can be registered, data 
quality models can be defined, data quality can be visualized and monitored 
using a simple tool and teams alerted when an issue is detected.\n3. Lack of a 
Shared platform and API Service. Every team should not have to apply and manage 
own hardware and software infrastructure to solve this common problem.\n\nWith 
these in mind, we decided to build Apache Griffin - A data quality service that 
aims to solve the above short-comings.\n\nApache Griffin includes:\n\n**Data 
Quality Model Engine**: Apache Griffin is model driven solution, us
 er can choose various data quality dimension to execute his/her data quality 
validation based on selected target data-set or source data-set ( as the golden 
reference data). It has corresponding library supporting it in back-end for the 
following measurement:\n\n - Accuracy - Does data reflect the real-world 
objects or a verifiable source\n - Completeness - Is all necessary data 
present\n - Validity -  Are all data values within the data domains specified 
by the business\n - Timeliness - Is the data available at the time needed\n - 
Anomaly detection -  Pre-built algorithm functions for the identification of 
items, events or observations which do not conform to an expected pattern or 
other items in a dataset\n - Data Profiling - Apply statistical analysis and 
assessment of data values within a dataset for consistency, uniqueness and 
logic.\n\n**Data Collection Layer**:\n\nWe support two kinds of data sources, 
batch data and real time data.\n\nFor batch mode, we can collect data sourc
 e from  our Hadoop platform by various data connectors.\n\nFor real time mode, 
we can connect with messaging system like Kafka to near real time 
analysis.\n\n**Data Process and Storage Layer**:\n\nFor batch analysis, our 
data quality model will compute data quality metrics in our spark cluster based 
on data source in hadoop.\n\nFor near real time analysis, we consume data from 
messaging system, then our data quality model will compute our real time data 
quality metrics in our spark cluster. for data storage, we use time series 
database in our back end to fulfill front end request.\n\n**Apache Griffin 
Service**:\n\nWe have RESTful web services to accomplish all the 
functionalities of Apache Griffin, such as register data-set, create data 
quality model, publish metrics, retrieve metrics, add subscription, etc. So, 
the developers can develop their own user interface based on these web 
serivces.\n\n## Main business process\nHere's the business process 
diagram\n\n![](/images/Business_Pro
 cess.png)\n\n## Rationale\nThe challenge we face at eBay is that our data 
volume is becoming bigger and bigger, systems process become more complex, 
while we do not have a unified data quality solution to ensure the trusted data 
sets which provide confidences on data quality to our data consumers.  The key 
challenges on data quality includes:\n\n1. Existing commercial data quality 
solution cannot address data quality lineage among systems, cannot scale out to 
support fast growing data at eBay\n2. Existing eBay's domain specific tools 
take a long time to identify and fix poor data quality when data flowed through 
multiple systems\n3. Business logic becomes complex, requires data quality 
system much flexible.\n4. Some data quality issues do have business impact on 
user experiences, revenue, efficiency & compliance.\n5. Communication overhead 
of data quality metrics, typically in a big organization, which involve 
different teams.\n\nThe idea of  Apache Apache Griffin is to provide Data
  Quality validation as a Service, to allow data engineers and data consumers 
to have:\n\n - Near real-time understanding of the data quality health of your 
data pipelines with end-to-end monitoring, all in one place.\n - Profiling, 
detecting and correlating issues and providing recommendations that drive rapid 
and focused troubleshooting\n - A centralized data quality model management 
system including rule, metadata, scheduler etc.  \n - Native code generation to 
run everywhere, including Hadoop, Kafka, Spark, etc.\n - One set of tools to 
build data quality pipelines across all eBay data 
platforms.\n","source":"_posts/hello-world.md","raw":"---\ntitle: Apache 
Griffin\n---\n\n## Abstract\nApache Griffin is a Data Quality Service platform 
built on Apache Hadoop and Apache Spark. It provides a framework process for 
defining data quality model, executing data quality measurement, automating 
data profiling and validation, as well as a unified data quality visualization 
across multiple da
 ta systems.  It tries to address the data quality challenges in big data and 
streaming context.\n\n\n## Overview of Apache Griffin  \nAt eBay, when people 
use big data (Hadoop or other streaming systems), measurement of data quality 
is a big challenge. Different teams have built customized tools to detect and 
analyze data quality issues within their own domains. As a platform 
organization, we think of taking a platform approach to commonly occurring 
patterns. As such, we are building a platform to provide shared Infrastructure 
and generic features to solve common data quality pain points. This would 
enable us to build trusted data assets.\n\nCurrently it is very difficult and 
costly to do data quality validation when we have large volumes of related data 
flowing across multi-platforms (streaming and batch). Take eBay's Real-time 
Personalization Platform as a sample; Everyday we have to validate the data 
quality for ~600M records. Data quality often becomes one big challenge in this 
 complex environment and massive scale.\n\nWe detect the following at 
eBay:\n\n1. Lack of an end-to-end, unified view of data quality from multiple 
data sources to target applications that takes into account the lineage of the 
data. This results in a long time to identify and fix data quality issues.\n2. 
Lack of a system to measure data quality in streaming mode through 
self-service. The need is for a system where datasets can be registered, data 
quality models can be defined, data quality can be visualized and monitored 
using a simple tool and teams alerted when an issue is detected.\n3. Lack of a 
Shared platform and API Service. Every team should not have to apply and manage 
own hardware and software infrastructure to solve this common problem.\n\nWith 
these in mind, we decided to build Apache Griffin - A data quality service that 
aims to solve the above short-comings.\n\nApache Griffin includes:\n\n**Data 
Quality Model Engine**: Apache Griffin is model driven solution, user can ch
 oose various data quality dimension to execute his/her data quality validation 
based on selected target data-set or source data-set ( as the golden reference 
data). It has corresponding library supporting it in back-end for the following 
measurement:\n\n - Accuracy - Does data reflect the real-world objects or a 
verifiable source\n - Completeness - Is all necessary data present\n - Validity 
-  Are all data values within the data domains specified by the business\n - 
Timeliness - Is the data available at the time needed\n - Anomaly detection -  
Pre-built algorithm functions for the identification of items, events or 
observations which do not conform to an expected pattern or other items in a 
dataset\n - Data Profiling - Apply statistical analysis and assessment of data 
values within a dataset for consistency, uniqueness and logic.\n\n**Data 
Collection Layer**:\n\nWe support two kinds of data sources, batch data and 
real time data.\n\nFor batch mode, we can collect data source from  o
 ur Hadoop platform by various data connectors.\n\nFor real time mode, we can 
connect with messaging system like Kafka to near real time analysis.\n\n**Data 
Process and Storage Layer**:\n\nFor batch analysis, our data quality model will 
compute data quality metrics in our spark cluster based on data source in 
hadoop.\n\nFor near real time analysis, we consume data from messaging system, 
then our data quality model will compute our real time data quality metrics in 
our spark cluster. for data storage, we use time series database in our back 
end to fulfill front end request.\n\n**Apache Griffin Service**:\n\nWe have 
RESTful web services to accomplish all the functionalities of Apache Griffin, 
such as register data-set, create data quality model, publish metrics, retrieve 
metrics, add subscription, etc. So, the developers can develop their own user 
interface based on these web serivces.\n\n## Main business process\nHere's the 
business process diagram\n\n![](/images/Business_Process.png)
 \n\n## Rationale\nThe challenge we face at eBay is that our data volume is 
becoming bigger and bigger, systems process become more complex, while we do 
not have a unified data quality solution to ensure the trusted data sets which 
provide confidences on data quality to our data consumers.  The key challenges 
on data quality includes:\n\n1. Existing commercial data quality solution 
cannot address data quality lineage among systems, cannot scale out to support 
fast growing data at eBay\n2. Existing eBay's domain specific tools take a long 
time to identify and fix poor data quality when data flowed through multiple 
systems\n3. Business logic becomes complex, requires data quality system much 
flexible.\n4. Some data quality issues do have business impact on user 
experiences, revenue, efficiency & compliance.\n5. Communication overhead of 
data quality metrics, typically in a big organization, which involve different 
teams.\n\nThe idea of  Apache Apache Griffin is to provide Data Quality 
 validation as a Service, to allow data engineers and data consumers to 
have:\n\n - Near real-time understanding of the data quality health of your 
data pipelines with end-to-end monitoring, all in one place.\n - Profiling, 
detecting and correlating issues and providing recommendations that drive rapid 
and focused troubleshooting\n - A centralized data quality model management 
system including rule, metadata, scheduler etc.  \n - Native code generation to 
run everywhere, including Hadoop, Kafka, Spark, etc.\n - One set of tools to 
build data quality pipelines across all eBay data 
platforms.\n","slug":"hello-world","published":1,"date":"2017-03-20T20:09:44.000Z","updated":"2017-03-20T20:39:18.000Z","_id":"cj0ikfzhw0000pgpouj7009vf","comments":1,"layout":"post","photos":[],"link":"","content":"<h2
 id=\"Abstract\"><a href=\"#Abstract\" class=\"headerlink\" 
title=\"Abstract\"></a>Abstract</h2><p>Apache Griffin is a Data Quality Service 
platform built on Apache Hadoop and Apache Spark. It
  provides a framework process for defining data quality model, executing data 
quality measurement, automating data profiling and validation, as well as a 
unified data quality visualization across multiple data systems.  It tries to 
address the data quality challenges in big data and streaming context.</p>\n<h2 
id=\"Overview-of-Apache-Griffin\"><a href=\"#Overview-of-Apache-Griffin\" 
class=\"headerlink\" title=\"Overview of Apache Griffin\"></a>Overview of 
Apache Griffin</h2><p>At eBay, when people use big data (Hadoop or other 
streaming systems), measurement of data quality is a big challenge. Different 
teams have built customized tools to detect and analyze data quality issues 
within their own domains. As a platform organization, we think of taking a 
platform approach to commonly occurring patterns. As such, we are building a 
platform to provide shared Infrastructure and generic features to solve common 
data quality pain points. This would enable us to build trusted data assets.</p
 >\n<p>Currently it is very difficult and costly to do data quality validation 
 >when we have large volumes of related data flowing across multi-platforms 
 >(streaming and batch). Take eBayâs Real-time Personalization Platform as a 
 >sample; Everyday we have to validate the data quality for ~600M records. Data 
 >quality often becomes one big challenge in this complex environment and 
 >massive scale.</p>\n<p>We detect the following at eBay:</p>\n<ol>\n<li>Lack 
 >of an end-to-end, unified view of data quality from multiple data sources to 
 >target applications that takes into account the lineage of the data. This 
 >results in a long time to identify and fix data quality 
 >issues.</li>\n<li>Lack of a system to measure data quality in streaming mode 
 >through self-service. The need is for a system where datasets can be 
 >registered, data quality models can be defined, data quality can be 
 >visualized and monitored using a simple tool and teams alerted when an issue 
 >is detected.</li>\n<li>Lack of a Shared plat
 form and API Service. Every team should not have to apply and manage own 
hardware and software infrastructure to solve this common 
problem.</li>\n</ol>\n<p>With these in mind, we decided to build Apache Griffin 
- A data quality service that aims to solve the above 
short-comings.</p>\n<p>Apache Griffin includes:</p>\n<p><strong>Data Quality 
Model Engine</strong>: Apache Griffin is model driven solution, user can choose 
various data quality dimension to execute his/her data quality validation based 
on selected target data-set or source data-set ( as the golden reference data). 
It has corresponding library supporting it in back-end for the following 
measurement:</p>\n<ul>\n<li>Accuracy - Does data reflect the real-world objects 
or a verifiable source</li>\n<li>Completeness - Is all necessary data 
present</li>\n<li>Validity -  Are all data values within the data domains 
specified by the business</li>\n<li>Timeliness - Is the data available at the 
time needed</li>\n<li>Anomaly detection 
 -  Pre-built algorithm functions for the identification of items, events or 
observations which do not conform to an expected pattern or other items in a 
dataset</li>\n<li>Data Profiling - Apply statistical analysis and assessment of 
data values within a dataset for consistency, uniqueness and 
logic.</li>\n</ul>\n<p><strong>Data Collection Layer</strong>:</p>\n<p>We 
support two kinds of data sources, batch data and real time data.</p>\n<p>For 
batch mode, we can collect data source from  our Hadoop platform by various 
data connectors.</p>\n<p>For real time mode, we can connect with messaging 
system like Kafka to near real time analysis.</p>\n<p><strong>Data Process and 
Storage Layer</strong>:</p>\n<p>For batch analysis, our data quality model will 
compute data quality metrics in our spark cluster based on data source in 
hadoop.</p>\n<p>For near real time analysis, we consume data from messaging 
system, then our data quality model will compute our real time data quality 
metrics in our 
 spark cluster. for data storage, we use time series database in our back end 
to fulfill front end request.</p>\n<p><strong>Apache Griffin 
Service</strong>:</p>\n<p>We have RESTful web services to accomplish all the 
functionalities of Apache Griffin, such as register data-set, create data 
quality model, publish metrics, retrieve metrics, add subscription, etc. So, 
the developers can develop their own user interface based on these web 
serivces.</p>\n<h2 id=\"Main-business-process\"><a 
href=\"#Main-business-process\" class=\"headerlink\" title=\"Main business 
process\"></a>Main business process</h2><p>Hereâs the business process 
diagram</p>\n<p><img src=\"/images/Business_Process.png\" alt=\"\"></p>\n<h2 
id=\"Rationale\"><a href=\"#Rationale\" class=\"headerlink\" 
title=\"Rationale\"></a>Rationale</h2><p>The challenge we face at eBay is that 
our data volume is becoming bigger and bigger, systems process become more 
complex, while we do not have a unified data quality solution to ensu
 re the trusted data sets which provide confidences on data quality to our data 
consumers.  The key challenges on data quality 
includes:</p>\n<ol>\n<li>Existing commercial data quality solution cannot 
address data quality lineage among systems, cannot scale out to support fast 
growing data at eBay</li>\n<li>Existing eBayâs domain specific tools take a 
long time to identify and fix poor data quality when data flowed through 
multiple systems</li>\n<li>Business logic becomes complex, requires data 
quality system much flexible.</li>\n<li>Some data quality issues do have 
business impact on user experiences, revenue, efficiency &amp; 
compliance.</li>\n<li>Communication overhead of data quality metrics, typically 
in a big organization, which involve different teams.</li>\n</ol>\n<p>The idea 
of  Apache Apache Griffin is to provide Data Quality validation as a Service, 
to allow data engineers and data consumers to have:</p>\n<ul>\n<li>Near 
real-time understanding of the data quality health 
 of your data pipelines with end-to-end monitoring, all in one 
place.</li>\n<li>Profiling, detecting and correlating issues and providing 
recommendations that drive rapid and focused troubleshooting</li>\n<li>A 
centralized data quality model management system including rule, metadata, 
scheduler etc.  </li>\n<li>Native code generation to run everywhere, including 
Hadoop, Kafka, Spark, etc.</li>\n<li>One set of tools to build data quality 
pipelines across all eBay data 
platforms.</li>\n</ul>\n","excerpt":"","more":"<h2 id=\"Abstract\"><a 
href=\"#Abstract\" class=\"headerlink\" 
title=\"Abstract\"></a>Abstract</h2><p>Apache Griffin is a Data Quality Service 
platform built on Apache Hadoop and Apache Spark. It provides a framework 
process for defining data quality model, executing data quality measurement, 
automating data profiling and validation, as well as a unified data quality 
visualization across multiple data systems.  It tries to address the data 
quality challenges in big data and 
 streaming context.</p>\n<h2 id=\"Overview-of-Apache-Griffin\"><a 
href=\"#Overview-of-Apache-Griffin\" class=\"headerlink\" title=\"Overview of 
Apache Griffin\"></a>Overview of Apache Griffin</h2><p>At eBay, when people use 
big data (Hadoop or other streaming systems), measurement of data quality is a 
big challenge. Different teams have built customized tools to detect and 
analyze data quality issues within their own domains. As a platform 
organization, we think of taking a platform approach to commonly occurring 
patterns. As such, we are building a platform to provide shared Infrastructure 
and generic features to solve common data quality pain points. This would 
enable us to build trusted data assets.</p>\n<p>Currently it is very difficult 
and costly to do data quality validation when we have large volumes of related 
data flowing across multi-platforms (streaming and batch). Take eBayâs 
Real-time Personalization Platform as a sample; Everyday we have to validate 
the data quality f
 or ~600M records. Data quality often becomes one big challenge in this complex 
environment and massive scale.</p>\n<p>We detect the following at 
eBay:</p>\n<ol>\n<li>Lack of an end-to-end, unified view of data quality from 
multiple data sources to target applications that takes into account the 
lineage of the data. This results in a long time to identify and fix data 
quality issues.</li>\n<li>Lack of a system to measure data quality in streaming 
mode through self-service. The need is for a system where datasets can be 
registered, data quality models can be defined, data quality can be visualized 
and monitored using a simple tool and teams alerted when an issue is 
detected.</li>\n<li>Lack of a Shared platform and API Service. Every team 
should not have to apply and manage own hardware and software infrastructure to 
solve this common problem.</li>\n</ol>\n<p>With these in mind, we decided to 
build Apache Griffin - A data quality service that aims to solve the above 
short-comings.</p>\
 n<p>Apache Griffin includes:</p>\n<p><strong>Data Quality Model 
Engine</strong>: Apache Griffin is model driven solution, user can choose 
various data quality dimension to execute his/her data quality validation based 
on selected target data-set or source data-set ( as the golden reference data). 
It has corresponding library supporting it in back-end for the following 
measurement:</p>\n<ul>\n<li>Accuracy - Does data reflect the real-world objects 
or a verifiable source</li>\n<li>Completeness - Is all necessary data 
present</li>\n<li>Validity -  Are all data values within the data domains 
specified by the business</li>\n<li>Timeliness - Is the data available at the 
time needed</li>\n<li>Anomaly detection -  Pre-built algorithm functions for 
the identification of items, events or observations which do not conform to an 
expected pattern or other items in a dataset</li>\n<li>Data Profiling - Apply 
statistical analysis and assessment of data values within a dataset for 
consistency, uniqu
 eness and logic.</li>\n</ul>\n<p><strong>Data Collection 
Layer</strong>:</p>\n<p>We support two kinds of data sources, batch data and 
real time data.</p>\n<p>For batch mode, we can collect data source from  our 
Hadoop platform by various data connectors.</p>\n<p>For real time mode, we can 
connect with messaging system like Kafka to near real time 
analysis.</p>\n<p><strong>Data Process and Storage Layer</strong>:</p>\n<p>For 
batch analysis, our data quality model will compute data quality metrics in our 
spark cluster based on data source in hadoop.</p>\n<p>For near real time 
analysis, we consume data from messaging system, then our data quality model 
will compute our real time data quality metrics in our spark cluster. for data 
storage, we use time series database in our back end to fulfill front end 
request.</p>\n<p><strong>Apache Griffin Service</strong>:</p>\n<p>We have 
RESTful web services to accomplish all the functionalities of Apache Griffin, 
such as register data-set, create 
 data quality model, publish metrics, retrieve metrics, add subscription, etc. 
So, the developers can develop their own user interface based on these web 
serivces.</p>\n<h2 id=\"Main-business-process\"><a 
href=\"#Main-business-process\" class=\"headerlink\" title=\"Main business 
process\"></a>Main business process</h2><p>Hereâs the business process 
diagram</p>\n<p><img src=\"/images/Business_Process.png\" alt=\"\"></p>\n<h2 
id=\"Rationale\"><a href=\"#Rationale\" class=\"headerlink\" 
title=\"Rationale\"></a>Rationale</h2><p>The challenge we face at eBay is that 
our data volume is becoming bigger and bigger, systems process become more 
complex, while we do not have a unified data quality solution to ensure the 
trusted data sets which provide confidences on data quality to our data 
consumers.  The key challenges on data quality 
includes:</p>\n<ol>\n<li>Existing commercial data quality solution cannot 
address data quality lineage among systems, cannot scale out to support fast 
growing
  data at eBay</li>\n<li>Existing eBayâs domain specific tools take a long 
time to identify and fix poor data quality when data flowed through multiple 
systems</li>\n<li>Business logic becomes complex, requires data quality system 
much flexible.</li>\n<li>Some data quality issues do have business impact on 
user experiences, revenue, efficiency &amp; compliance.</li>\n<li>Communication 
overhead of data quality metrics, typically in a big organization, which 
involve different teams.</li>\n</ol>\n<p>The idea of  Apache Apache Griffin is 
to provide Data Quality validation as a Service, to allow data engineers and 
data consumers to have:</p>\n<ul>\n<li>Near real-time understanding of the data 
quality health of your data pipelines with end-to-end monitoring, all in one 
place.</li>\n<li>Profiling, detecting and correlating issues and providing 
recommendations that drive rapid and focused troubleshooting</li>\n<li>A 
centralized data quality model management system including rule, metadata,
  scheduler etc.  </li>\n<li>Native code generation to run everywhere, 
including Hadoop, Kafka, Spark, etc.</li>\n<li>One set of tools to build data 
quality pipelines across all eBay data 
platforms.</li>\n</ul>\n"}],"PostAsset":[],"PostCategory":[],"PostTag":[],"Tag":[]}}
\ No newline at end of file
+{"meta":{"version":1,"warehouse":"2.2.0"},"models":{"Asset":[],"Cache":[],"Category":[],"Data":[],"Page":[],"Post":[],"PostAsset":[],"PostCategory":[],"PostTag":[],"Tag":[]}}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-griffin-site/blob/b4142cf0/readme.md
----------------------------------------------------------------------
diff --git a/readme.md b/readme.md
index 475f96d..8db43d5 100644
--- a/readme.md
+++ b/readme.md
@@ -1 +1,31 @@
-hello griffin
+# Apache Griffin (incubating) website
+
+This is the website for [Apache Griffin](      
http://griffin.incubator.apache.org/) (incubating).
+
+## About
+This website is based on Hexo and a default Hexo theme.
+
+## Prerequisite
+1. Nodejs
+
+
+## Install & Run
+1. npm install hexo-cli -g
+2. git clone https://github.com/apache/incubator-griffin-site.git
+3. cd incubator-griffin-site
+4. npm install
+5. hexo server
+
+
+## Deploy to asf-site
+1. Checkout branch master
+2. Generate the site to content directory: `hexo generate`
+3. Check the changes and commit.
+4. Push asf-site to remote branch by command `hexo deploy`.
+
+## Questions
+
+### Where to check configuration
+
+Please refer to _config.yml for more details, like github branch
+

incubator-griffin-site git commit: how to build this website

Reply via email to