http://git-wip-us.apache.org/repos/asf/drill-site/blob/bbdefcb2/_site/docs/analyzing-highly-dynamic-datasets/index.html
----------------------------------------------------------------------
diff --git a/_site/docs/analyzing-highly-dynamic-datasets/index.html
b/_site/docs/analyzing-highly-dynamic-datasets/index.html
new file mode 100644
index 0000000..46da85c
--- /dev/null
+++ b/_site/docs/analyzing-highly-dynamic-datasets/index.html
@@ -0,0 +1,1233 @@
+<!DOCTYPE html>
+<html>
+
+<head>
+
+<meta charset="UTF-8">
+<meta name=viewport content="width=device-width, initial-scale=1">
+<meta name="robots" content="noindex">
+
+<title>Analyzing Highly Dynamic Datasets - Apache Drill</title>
+
+<link
href="//maxcdn.bootstrapcdn.com/font-awesome/4.3.0/css/font-awesome.min.css"
rel="stylesheet" type="text/css"/>
+<link href='//fonts.googleapis.com/css?family=PT+Sans' rel='stylesheet'
type='text/css'/>
+<link href="/drill/css/site.css" rel="stylesheet" type="text/css"/>
+
+<link rel="shortcut icon" href="/drill/favicon.ico" type="image/x-icon"/>
+<link rel="icon" href="/drill/favicon.ico" type="image/x-icon"/>
+
+<script src="//ajax.googleapis.com/ajax/libs/jquery/1.11.1/jquery.min.js"
language="javascript" type="text/javascript"></script>
+<script
src="//cdnjs.cloudflare.com/ajax/libs/jquery-easing/1.3/jquery.easing.min.js"
language="javascript" type="text/javascript"></script>
+<script language="javascript" type="text/javascript"
src="/drill/js/modernizr.custom.js"></script>
+<script language="javascript" type="text/javascript"
src="/drill/js/script.js"></script>
+<script language="javascript" type="text/javascript"
src="/drill/js/drill.js"></script>
+
+</head>
+
+
+<body onResize="resized();">
+ <div class="page-wrap">
+ <div class="bui"></div>
+
+<div id="menu" class="mw">
+<ul>
+ <li class='toc-categories'>
+ <a class="expand-toc-icon" href="javascript:void(0);"><i class="fa
fa-bars"></i></a>
+ </li>
+ <li class="logo"><a href="/drill/"></a></li>
+ <li class='expand-menu'>
+ <a href="javascript:void(0);"><span class='menu-text'>Menu</span><span
class='expand-icon'><i class="fa fa-bars"></i></span></a>
+ </li>
+ <li class='clear-float'></li>
+ <li class="documentation-menu">
+ <a href="/drill/docs/">Documentation</a>
+ <ul>
+
+ <li><a href="/drill/docs/getting-started/">Getting Started</a></li>
+
+ <li><a href="/drill/docs/architecture/">Architecture</a></li>
+
+ <li><a href="/drill/docs/tutorials/">Tutorials</a></li>
+
+ <li><a href="/drill/docs/install-drill/">Install Drill</a></li>
+
+ <li><a href="/drill/docs/configure-drill/">Configure Drill</a></li>
+
+ <li><a href="/drill/docs/connect-a-data-source/">Connect a Data
Source</a></li>
+
+ <li><a href="/drill/docs/odbc-jdbc-interfaces/">ODBC/JDBC
Interfaces</a></li>
+
+ <li><a href="/drill/docs/query-data/">Query Data</a></li>
+
+ <li><a href="/drill/docs/performance-tuning/">Performance
Tuning</a></li>
+
+ <li><a href="/drill/docs/log-and-debug/">Log and Debug</a></li>
+
+ <li><a href="/drill/docs/sql-reference/">SQL Reference</a></li>
+
+ <li><a href="/drill/docs/data-sources-and-file-formats/">Data Sources
and File Formats</a></li>
+
+ <li><a href="/drill/docs/develop-custom-functions/">Develop Custom
Functions</a></li>
+
+ <li><a href="/drill/docs/troubleshooting/">Troubleshooting</a></li>
+
+ <li><a href="/drill/docs/developer-information/">Developer
Information</a></li>
+
+ <li><a href="/drill/docs/release-notes/">Release Notes</a></li>
+
+ <li><a href="/drill/docs/sample-datasets/">Sample Datasets</a></li>
+
+ <li><a href="/drill/docs/project-bylaws/">Project Bylaws</a></li>
+
+ </ul>
+ </li>
+ <li class='nav'>
+ <a href="/drill/community-resources/">Community</a>
+ <ul>
+ <li><a href="/drill/team/">Team</a></li>
+ <li><a href="/drill/mailinglists/">Mailing Lists</a></li>
+ <li><a href="/drill/community-resources/">Community Resources</a></li>
+ </ul>
+ </li>
+ <li class='nav'><a href="/drill/faq/">FAQ</a></li>
+ <li class='nav'><a href="/drill/blog/">Blog</a></li>
+ <li id="twitter-menu-item"><a href="https://twitter.com/apachedrill"
title="apachedrill on twitter" target="_blank"><img
src="/drill/images/twitter_32_26_white.png" alt="twitter logo"
align="center"></a> </li>
+ <li class='search-bar'>
+ <form id="drill-search-form">
+ <input type="text" placeholder="Search Apache Drill"
id="drill-search-term" />
+ <button type="submit">
+ <i class="fa fa-search"></i>
+ </button>
+ </form>
+ </li>
+ <li class="d">
+ <a href="/drill/download/">
+ <i class="fa fa-cloud-download"></i> Download
+ </a>
+ </li>
+</ul>
+</div>
+
+ <link href="/drill/css/content.css" rel="stylesheet" type="text/css">
+
+
+
+
+
+
+
+<aside class="sidebar">
+ <div class="docsidebar">
+ <div class="docsidebarwrapper">
+ <ul style="display: block;">
+
+
+ <li class="toctree-l1"><a href="javascript: void(0);">Getting
Started</a></li>
+ <ul style="display: none">
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/drill/docs/drill-introduction/">Drill Introduction</a></li>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/drill/docs/why-drill/">Why Drill</a></li>
+
+
+ </ul>
+
+
+
+ <li class="toctree-l1"><a href="javascript:
void(0);">Architecture</a></li>
+ <ul style="display: none">
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/drill/docs/architecture-introduction/">Architecture Introduction</a></li>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/drill/docs/drill-query-execution/">Drill Query Execution</a></li>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/drill/docs/core-modules/">Core Modules</a></li>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/drill/docs/performance/">Performance</a></li>
+
+
+ </ul>
+
+
+
+ <li class="toctree-l1 current_section "><a href="javascript:
void(0);">Tutorials</a></li>
+ <ul class="current_section">
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/drill/docs/tutorials-introduction/">Tutorials Introduction</a></li>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/drill/docs/drill-in-10-minutes/">Drill in 10 Minutes</a></li>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/drill/docs/analyzing-the-yelp-academic-dataset/">Analyzing the Yelp
Academic Dataset</a></li>
+
+
+
+ <li class="toctree-l2"><a href="javascript: void(0);">Learn
Drill with the MapR Sandbox</a></li>
+ <ul style="display: none">
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/about-the-mapr-sandbox/">About the MapR Sandbox</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/installing-the-apache-drill-sandbox/">Installing the Apache
Drill Sandbox</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/getting-to-know-the-drill-sandbox/">Getting to Know the Drill
Sandbox</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/lesson-1-learn-about-the-data-set/">Lesson 1: Learn about the
Data Set</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/lesson-2-run-queries-with-ansi-sql/">Lesson 2: Run Queries
with ANSI SQL</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/lesson-3-run-queries-on-complex-data-types/">Lesson 3: Run
Queries on Complex Data Types</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/summary/">Summary</a></li>
+
+ </ul>
+
+
+
+ <li class="toctree-l2 current"><a class="reference internal"
href="/drill/docs/analyzing-highly-dynamic-datasets/">Analyzing Highly Dynamic
Datasets</a></li>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/drill/docs/analyzing-social-media/">Analyzing Social Media</a></li>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/drill/docs/analyzing-data-using-window-functions/">Analyzing Data Using
Window Functions</a></li>
+
+
+ </ul>
+
+
+
+ <li class="toctree-l1"><a href="javascript: void(0);">Install
Drill</a></li>
+ <ul style="display: none">
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/drill/docs/install-drill-introduction/">Install Drill
Introduction</a></li>
+
+
+
+ <li class="toctree-l2"><a href="javascript: void(0);">Installing
Drill in Embedded Mode</a></li>
+ <ul style="display: none">
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/embedded-mode-prerequisites/">Embedded Mode
Prerequisites</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/installing-drill-on-linux-and-mac-os-x/">Installing Drill on
Linux and Mac OS X</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/starting-drill-on-linux-and-mac-os-x/">Starting Drill on
Linux and Mac OS X</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/installing-drill-on-windows/">Installing Drill on
Windows</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/starting-drill-on-windows/">Starting Drill on Windows</a></li>
+
+ </ul>
+
+
+
+ <li class="toctree-l2"><a href="javascript: void(0);">Installing
Drill in Distributed Mode</a></li>
+ <ul style="display: none">
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/distributed-mode-prerequisites/">Distributed Mode
Prerequisites</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/installing-drill-on-the-cluster/">Installing Drill on the
Cluster</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/starting-drill-in-distributed-mode/">Starting Drill in
Distributed Mode</a></li>
+
+ </ul>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/drill/docs/starting-the-web-console/">Starting the Web Console</a></li>
+
+
+ </ul>
+
+
+
+ <li class="toctree-l1"><a href="javascript: void(0);">Configure
Drill</a></li>
+ <ul style="display: none">
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/drill/docs/configure-drill-introduction/">Configure Drill
Introduction</a></li>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/drill/docs/configuring-drill-memory/">Configuring Drill Memory</a></li>
+
+
+
+ <li class="toctree-l2"><a href="javascript:
void(0);">Configuring a Multitenant Cluster</a></li>
+ <ul style="display: none">
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/configuring-a-multitenant-cluster-introduction/">Configuring
a Multitenant Cluster Introduction</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/configuring-multitenant-resources/">Configuring Multitenant
Resources</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/configuring-resources-for-a-shared-drillbit/">Configuring
Resources for a Shared Drillbit</a></li>
+
+ </ul>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/drill/docs/configuring-user-impersonation/">Configuring User
Impersonation</a></li>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/drill/docs/configuring-user-authentication/">Configuring User
Authentication</a></li>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/drill/docs/configuring-user-impersonation-with-hive-authorization/">Configuring
User Impersonation with Hive Authorization</a></li>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/drill/docs/configuring-web-console-and-rest-api-security/">Configuring
Web Console and REST API Security</a></li>
+
+
+
+ <li class="toctree-l2"><a href="javascript:
void(0);">Configuration Options</a></li>
+ <ul style="display: none">
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/configuration-options-introduction/">Configuration Options
Introduction</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/start-up-options/">Start-Up Options</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/planning-and-execution-options/">Planning and Execution
Options</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/persistent-configuration-storage/">Persistent Configuration
Storage</a></li>
+
+ </ul>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/drill/docs/ports-used-by-drill/">Ports Used by Drill</a></li>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/drill/docs/configuring-the-drill-shell/">Configuring the Drill
Shell</a></li>
+
+
+ </ul>
+
+
+
+ <li class="toctree-l1"><a href="javascript: void(0);">Connect a Data
Source</a></li>
+ <ul style="display: none">
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/drill/docs/connect-a-data-source-introduction/">Connect a Data Source
Introduction</a></li>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/drill/docs/storage-plugin-registration/">Storage Plugin
Registration</a></li>
+
+
+
+ <li class="toctree-l2"><a href="javascript: void(0);">Storage
Plugin Configuration</a></li>
+ <ul style="display: none">
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/plugin-configuration-basics/">Plugin Configuration
Basics</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/workspaces/">Workspaces</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/drill-default-input-format/">Drill Default Input
Format</a></li>
+
+ </ul>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/drill/docs/file-system-storage-plugin/">File System Storage
Plugin</a></li>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/drill/docs/hbase-storage-plugin/">HBase Storage Plugin</a></li>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/drill/docs/hive-storage-plugin/">Hive Storage Plugin</a></li>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/drill/docs/rdbms-storage-plugin/">RDBMS Storage Plugin</a></li>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/drill/docs/mongodb-storage-plugin/">MongoDB Storage Plugin</a></li>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/drill/docs/mapr-db-format/">MapR-DB Format</a></li>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/drill/docs/s3-storage-plugin/">S3 Storage Plugin</a></li>
+
+
+ </ul>
+
+
+
+ <li class="toctree-l1"><a href="javascript: void(0);">ODBC/JDBC
Interfaces</a></li>
+ <ul style="display: none">
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/drill/docs/interfaces-introduction/">Interfaces Introduction</a></li>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/drill/docs/using-the-jdbc-driver/">Using the JDBC Driver</a></li>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/drill/docs/using-jdbc-with-squirrel-on-windows/">Using JDBC with
SQuirreL on Windows</a></li>
+
+
+
+ <li class="toctree-l2"><a href="javascript: void(0);">Installing
the ODBC Driver</a></li>
+ <ul style="display: none">
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/installing-the-driver-on-linux/">Installing the Driver on
Linux</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/installing-the-driver-on-mac-os-x/">Installing the Driver on
Mac OS X</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/installing-the-driver-on-windows/">Installing the Driver on
Windows</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/installing-the-tdc-file-on-windows/">Installing the TDC File
on Windows</a></li>
+
+ </ul>
+
+
+
+ <li class="toctree-l2"><a href="javascript:
void(0);">Configuring ODBC</a></li>
+ <ul style="display: none">
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/odbc-configuration-reference/">ODBC Configuration
Reference</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/configuring-odbc-on-linux/">Configuring ODBC on Linux</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/configuring-odbc-on-mac-os-x/">Configuring ODBC on Mac OS
X</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/configuring-odbc-on-windows/">Configuring ODBC on
Windows</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/testing-the-odbc-connection/">Testing the ODBC
Connection</a></li>
+
+ </ul>
+
+
+
+ <li class="toctree-l2"><a href="javascript: void(0);">Using
Drill Explorer</a></li>
+ <ul style="display: none">
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/drill-explorer-introduction/">Drill Explorer
Introduction</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/connecting-drill-explorer-to-data/">Connecting Drill Explorer
to Data</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/browsing-data-and-defining-views/">Browsing Data and Defining
Views</a></li>
+
+ </ul>
+
+
+
+ <li class="toctree-l2"><a href="javascript: void(0);">Using
Drill with BI Tools</a></li>
+ <ul style="display: none">
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/using-drill-with-bi-tools-introduction/">Using Drill with BI
Tools Introduction</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/tableau-examples/">Tableau Examples</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/using-microstrategy-analytics-with-apache-drill/">Using
MicroStrategy Analytics with Apache Drill</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/using-tibco-spotfire-desktop-with-drill/">Using Tibco
Spotfire Desktop with Drill</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/configuring-tibco-spotfire-server-with-drill/">Configuring
Tibco Spotfire Server with Drill</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/using-qlik-sense-with-drill/">Using Qlik Sense with
Drill</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/using-apache-drill-with-tableau-9-desktop/">Using Apache
Drill with Tableau 9 Desktop</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/using-apache-drill-with-tableau-9-server/">Using Apache Drill
with Tableau 9 Server</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/configuring-jreport-with-drill/">Configuring JReport with
Drill</a></li>
+
+ </ul>
+
+
+ </ul>
+
+
+
+ <li class="toctree-l1"><a href="javascript: void(0);">Query
Data</a></li>
+ <ul style="display: none">
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/drill/docs/query-data-introduction/">Query Data Introduction</a></li>
+
+
+
+ <li class="toctree-l2"><a href="javascript: void(0);">Querying a
File System</a></li>
+ <ul style="display: none">
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/querying-a-file-system-introduction/">Querying a File System
Introduction</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/querying-json-files/">Querying JSON Files</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/querying-parquet-files/">Querying Parquet Files</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/querying-plain-text-files/">Querying Plain Text Files</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/querying-directories/">Querying Directories</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/querying-sequence-files/">Querying Sequence Files</a></li>
+
+ </ul>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/drill/docs/querying-hbase/">Querying HBase</a></li>
+
+
+
+ <li class="toctree-l2"><a href="javascript: void(0);">Querying
Complex Data</a></li>
+ <ul style="display: none">
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/querying-complex-data-introduction/">Querying Complex Data
Introduction</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/sample-data-donuts/">Sample Data: Donuts</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/selecting-flat-data/">Selecting Flat Data</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/using-sql-functions-clauses-and-joins/">Using SQL Functions,
Clauses, and Joins</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/selecting-nested-data-for-a-column/">Selecting Nested Data
for a Column</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/selecting-multiple-columns-within-nested-data/">Selecting
Multiple Columns Within Nested Data</a></li>
+
+ </ul>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/drill/docs/querying-hive/">Querying Hive</a></li>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/drill/docs/querying-the-information-schema/">Querying the INFORMATION
SCHEMA</a></li>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/drill/docs/querying-system-tables/">Querying System Tables</a></li>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/drill/docs/monitoring-and-canceling-queries-in-the-drill-web-console/">Monitoring
and Canceling Queries in the Drill Web Console</a></li>
+
+
+ </ul>
+
+
+
+ <li class="toctree-l1"><a href="javascript: void(0);">Performance
Tuning</a></li>
+ <ul style="display: none">
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/drill/docs/performance-tuning-introduction/">Performance Tuning
Introduction</a></li>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/drill/docs/partition-pruning/">Partition Pruning</a></li>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/drill/docs/optimizing-parquet-metadata-reading/">Optimizing Parquet
Metadata Reading</a></li>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/drill/docs/choosing-a-storage-format/">Choosing a Storage Format</a></li>
+
+
+
+ <li class="toctree-l2"><a href="javascript: void(0);">Query
Plans and Tuning</a></li>
+ <ul style="display: none">
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/query-plans-and-tuning-introduction/">Query Plans and Tuning
Introduction</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/join-planning-guidelines/">Join Planning Guidelines</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/guidelines-for-optimizing-aggregation/">Guidelines for
Optimizing Aggregation</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/modifying-query-planning-options/">Modifying Query Planning
Options</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/sort-based-and-hash-based-memory-constrained-operators/">Sort-Based
and Hash-Based Memory-Constrained Operators</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/enabling-query-queuing/">Enabling Query Queuing</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/controlling-parallelization-to-balance-performance-with-multi-tenancy/">Controlling
Parallelization to Balance Performance with Multi-Tenancy</a></li>
+
+ </ul>
+
+
+
+ <li class="toctree-l2"><a href="javascript:
void(0);">Identifying Performance Issues</a></li>
+ <ul style="display: none">
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/query-plans/">Query Plans</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/query-profiles/">Query Profiles</a></li>
+
+ </ul>
+
+
+
+ <li class="toctree-l2"><a href="javascript:
void(0);">Performance Tuning Reference</a></li>
+ <ul style="display: none">
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/query-profile-column-descriptions/">Query Profile Column
Descriptions</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/physical-operators/">Physical Operators</a></li>
+
+ </ul>
+
+
+ </ul>
+
+
+
+ <li class="toctree-l1"><a href="javascript: void(0);">Log and
Debug</a></li>
+ <ul style="display: none">
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/drill/docs/log-and-debug-introduction/">Log and Debug
Introduction</a></li>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/drill/docs/error-messages/">Error Messages</a></li>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/drill/docs/modify-logback-xml/">Modify logback.xml</a></li>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/drill/docs/review-the-java-stack-trace/">Review the Java Stack
Trace</a></li>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/drill/docs/query-audit-logging/">Query Audit Logging</a></li>
+
+
+ </ul>
+
+
+
+ <li class="toctree-l1"><a href="javascript: void(0);">SQL
Reference</a></li>
+ <ul style="display: none">
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/drill/docs/sql-reference-introduction/">SQL Reference
Introduction</a></li>
+
+
+
+ <li class="toctree-l2"><a href="javascript: void(0);">Data
Types</a></li>
+ <ul style="display: none">
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/supported-data-types/">Supported Data Types</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/date-time-and-timestamp/">Date, Time, and Timestamp</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/handling-different-data-types/">Handling Different Data
Types</a></li>
+
+ </ul>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/drill/docs/lexical-structure/">Lexical Structure</a></li>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/drill/docs/operators/">Operators</a></li>
+
+
+
+ <li class="toctree-l2"><a href="javascript: void(0);">SQL
Functions</a></li>
+ <ul style="display: none">
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/about-sql-function-examples/">About SQL Function
Examples</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/math-and-trig/">Math and Trig</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/data-type-conversion/">Data Type Conversion</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/date-time-functions-and-arithmetic/">Date/Time Functions and
Arithmetic</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/string-manipulation/">String Manipulation</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/aggregate-and-aggregate-statistical/">Aggregate and Aggregate
Statistical</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/functions-for-handling-nulls/">Functions for Handling
Nulls</a></li>
+
+ </ul>
+
+
+
+ <li class="toctree-l2"><a href="javascript: void(0);">SQL Window
Functions</a></li>
+ <ul style="display: none">
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/sql-window-functions-introduction/">SQL Window Functions
Introduction</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/aggregate-window-functions/">Aggregate Window
Functions</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/ranking-window-functions/">Ranking Window Functions</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/value-window-functions/">Value Window Functions</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/sql-window-functions-examples/">SQL Window Functions
Examples</a></li>
+
+ </ul>
+
+
+
+ <li class="toctree-l2"><a href="javascript: void(0);">Nested
Data Functions</a></li>
+ <ul style="display: none">
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/nested-data-limitations/">Nested Data Limitations</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/flatten/">FLATTEN</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/kvgen/">KVGEN</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/repeated-count/">REPEATED_COUNT</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/repeated-contains/">REPEATED_CONTAINS</a></li>
+
+ </ul>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/drill/docs/query-directory-functions/">Query Directory Functions</a></li>
+
+
+
+ <li class="toctree-l2"><a href="javascript: void(0);">SQL
Commands</a></li>
+ <ul style="display: none">
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/supported-sql-commands/">Supported SQL Commands</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/alter-session/">ALTER SESSION</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/alter-system/">ALTER SYSTEM</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/create-table-as-ctas/">CREATE TABLE AS (CTAS)</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/partition-by-clause/">PARTITION BY Clause</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/create-view/">CREATE VIEW</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/describe/">DESCRIBE</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/drop-table/">DROP TABLE</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/drop-view/">DROP VIEW</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/explain/">EXPLAIN</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/select/">SELECT</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/select-list/">SELECT List</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/from-clause/">FROM Clause</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/group-by-clause/">GROUP BY Clause</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/having-clause/">HAVING Clause</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/limit-clause/">LIMIT Clause</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/offset-clause/">OFFSET Clause</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/order-by-clause/">ORDER BY Clause</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/union-set-operator/">UNION Set Operator</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/where-clause/">WHERE Clause</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/with-clause/">WITH Clause</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/show-databases-and-show-schemas/">SHOW DATABASES and SHOW
SCHEMAS</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/show-files/">SHOW FILES</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/show-tables/">SHOW TABLES</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/use/">USE</a></li>
+
+ </ul>
+
+
+
+ <li class="toctree-l2"><a href="javascript: void(0);">SQL
Conditional Expressions</a></li>
+ <ul style="display: none">
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/case/">CASE</a></li>
+
+ </ul>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/drill/docs/reserved-keywords/">Reserved Keywords</a></li>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/drill/docs/sql-extensions/">SQL Extensions</a></li>
+
+
+ </ul>
+
+
+
+ <li class="toctree-l1"><a href="javascript: void(0);">Data Sources
and File Formats</a></li>
+ <ul style="display: none">
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/drill/docs/data-sources-and-file-formats-introduction/">Data Sources and
File Formats Introduction</a></li>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/drill/docs/hive-to-drill-data-type-mapping/">Hive-to-Drill Data Type
Mapping</a></li>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/drill/docs/deploying-and-using-a-hive-udf/">Deploying and Using a Hive
UDF</a></li>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/drill/docs/parquet-format/">Parquet Format</a></li>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/drill/docs/json-data-model/">JSON Data Model</a></li>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/drill/docs/text-files-csv-tsv-psv/">Text Files: CSV, TSV, PSV</a></li>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/drill/docs/sequence-files/">Sequence Files</a></li>
+
+
+ </ul>
+
+
+
+ <li class="toctree-l1"><a href="javascript: void(0);">Develop Custom
Functions</a></li>
+ <ul style="display: none">
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/drill/docs/develop-custom-functions-introduction/">Develop Custom
Functions Introduction</a></li>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/drill/docs/developing-a-simple-function/">Developing a Simple
Function</a></li>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/drill/docs/tutorial-develop-a-simple-function/">Tutorial: Develop a
Simple Function</a></li>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/drill/docs/developing-an-aggregate-function/">Developing an Aggregate
Function</a></li>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/drill/docs/adding-custom-functions-to-drill/">Adding Custom Functions to
Drill</a></li>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/drill/docs/using-custom-functions-in-queries/">Using Custom Functions in
Queries</a></li>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/drill/docs/custom-function-interfaces/">Custom Function
Interfaces</a></li>
+
+
+ </ul>
+
+
+
+ <li class="toctree-l1"><a class="reference internal"
href="/drill/docs/troubleshooting/">Troubleshooting</a></li>
+
+
+
+ <li class="toctree-l1"><a href="javascript: void(0);">Developer
Information</a></li>
+ <ul style="display: none">
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/drill/docs/rest-api/">REST API</a></li>
+
+
+
+ <li class="toctree-l2"><a href="javascript: void(0);">Develop
Drill</a></li>
+ <ul style="display: none">
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/compiling-drill-from-source/">Compiling Drill from
Source</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/drill-patch-review-tool/">Drill Patch Review Tool</a></li>
+
+ </ul>
+
+
+
+ <li class="toctree-l2"><a href="javascript: void(0);">Contribute
to Drill</a></li>
+ <ul style="display: none">
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/apache-drill-contribution-guidelines/">Apache Drill
Contribution Guidelines</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/apache-drill-contribution-ideas/">Apache Drill Contribution
Ideas</a></li>
+
+ </ul>
+
+
+
+ <li class="toctree-l2"><a href="javascript: void(0);">Design
Docs</a></li>
+ <ul style="display: none">
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/drill-plan-syntax/">Drill Plan Syntax</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/rpc-overview/">RPC Overview</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/query-stages/">Query Stages</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/useful-research/">Useful Research</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/drill/docs/value-vectors/">Value Vectors</a></li>
+
+ </ul>
+
+
+ </ul>
+
+
+
+ <li class="toctree-l1"><a href="javascript: void(0);">Release
Notes</a></li>
+ <ul style="display: none">
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/drill/docs/apache-drill-1-3-0-release-notes/">Apache Drill 1.3.0 Release
Notes</a></li>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/drill/docs/apache-drill-1-2-0-release-notes/">Apache Drill 1.2.0 Release
Notes</a></li>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/drill/docs/apache-drill-1-1-0-release-notes/">Apache Drill 1.1.0 Release
Notes</a></li>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/drill/docs/apache-drill-1-0-0-release-notes/">Apache Drill 1.0.0 Release
Notes</a></li>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/drill/docs/apache-drill-0-9-0-release-notes/">Apache Drill 0.9.0 Release
Notes</a></li>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/drill/docs/apache-drill-0-8-0-release-notes/">Apache Drill 0.8.0 Release
Notes</a></li>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/drill/docs/apache-drill-0-7-0-release-notes/">Apache Drill 0.7.0 Release
Notes</a></li>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/drill/docs/apache-drill-0-6-0-release-notes/">Apache Drill 0.6.0 Release
Notes</a></li>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/drill/docs/apache-drill-m1-release-notes-apache-drill-alpha/">Apache
Drill M1 Release Notes (Apache Drill Alpha)</a></li>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/drill/docs/apache-drill-m1-release-notes-apache-drill-alpha/">Apache
Drill M1 Release Notes (Apache Drill Alpha)</a></li>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/drill/docs/apache-drill-0-5-0-release-notes/">Apache Drill 0.5.0 Release
Notes</a></li>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/drill/docs/apache-drill-0-4-0-release-notes/">Apache Drill 0.4.0 Release
Notes</a></li>
+
+
+ </ul>
+
+
+
+ <li class="toctree-l1"><a href="javascript: void(0);">Sample
Datasets</a></li>
+ <ul style="display: none">
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/drill/docs/aol-search/">AOL Search</a></li>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/drill/docs/enron-emails/">Enron Emails</a></li>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/drill/docs/wikipedia-edit-history/">Wikipedia Edit History</a></li>
+
+
+ </ul>
+
+
+
+ <li class="toctree-l1"><a class="reference internal"
href="/drill/docs/project-bylaws/">Project Bylaws</a></li>
+
+
+ </ul>
+
+ </div>
+ </div>
+</aside>
+
+
+<nav class="breadcrumbs">
+ <li><a href="/drill/docs/">Docs</a></li>
+
+
+ <li><a href="/drill/docs/tutorials/">Tutorials</a></li>
+
+ <li>Analyzing Highly Dynamic Datasets</li>
+</nav>
+
+<div class="main-content-wrapper">
+ <div class="main-content">
+
+
+ <a class="edit-link"
href="https://github.com/apache/drill/blob/gh-pages/_docs/tutorials/050-analyzing-highly-dynamic-datasets.md"
target="_blank"><i class="fa fa-pencil-square-o"></i></a>
+
+
+ <div class="int_title left">
+ <h1>Analyzing Highly Dynamic Datasets</h1>
+
+ </div>
+
+ <link href="/drill/css/docpage.css" rel="stylesheet" type="text/css">
+
+ <div class="int_text" align="left">
+
+ <p>Todayâs data is dynamic and application-driven. The growth of a
new era of business applications driven by industry trends such as web, social,
mobile, and Internet of Things are generating datasets with new data types and
new data models. These applications are iterative, and the associated data
models typically are semi-structured, schema-less and constantly evolving.
Semi-structured data models can be complex/nested, schema-less, and capable of
having varying fields in every single row and of constantly evolving as fields
get added and removed frequently to meet business requirements. </p>
+
+<p>This tutorial shows you how to natively query dynamic datasets, such as
JSON, and derive insights from any type of data in minutes. The dataset used in
the example is from the Yelp check-ins dataset, which has the following
structure:</p>
+<div class="highlight"><pre><code class="language-text"
data-lang="text">check-in
+{
+ 'type': 'checkin',
+ 'business_id': (encrypted business id),
+ 'checkin_info': {
+ '0-0': (number of checkins from 00:00 to 01:00 on all Sundays),
+ '1-0': (number of checkins from 01:00 to 02:00 on all Sundays),
+ ...
+ '14-4': (number of checkins from 14:00 to 15:00 on all
Thursdays),
+ ...
+ '23-6': (number of checkins from 23:00 to 00:00 on all
Saturdays)
+ }, # if there was no checkin for a hour-day block it will not be in the
dataset
+}
+</code></pre></div>
+<p>It is worth repeating the comment at the bottom of this snippet:</p>
+<div class="highlight"><pre><code class="language-text" data-lang="text"> # if
there was no checkin for a hour-day block it will not be in the dataset.
+</code></pre></div>
+<p>The element names that you see in the <code>checkin_info</code> are unknown
upfront and can vary for every row. The data, although simple, is highly
dynamic data. To analyze the data there is no need to first represent this
dataset in a flattened relational structure, as you would using any other SQL
on Hadoop technology.</p>
+
+<hr>
+
+<p>Step 1: First download Drill, if you have not yet done so, onto your
machine</p>
+<div class="highlight"><pre><code class="language-text"
data-lang="text">http://drill.apache.org/download/
+tar -xvf apache-drill-0.9.0.tar
+</code></pre></div>
+<p>Install Drill locally on your desktop (embedded mode). You donât need
Hadoop.</p>
+
+<hr>
+
+<p>Step 2: Start the Drill shell.</p>
+<div class="highlight"><pre><code class="language-text"
data-lang="text">bin/drill-embedded
+</code></pre></div>
+<hr>
+
+<p>Step 3: Start analyzing the data using SQL</p>
+
+<p>First, letâs take a look at the dataset:</p>
+<div class="highlight"><pre><code class="language-text" data-lang="text">0:
jdbc:drill:zk=local> SELECT * FROM
dfs.`/users/nrentachintala/Downloads/yelp/yelp_academic_dataset_checkin.json`
limit 2;
++------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------+------------------------+
+| checkin_info
|
type | business_id |
++------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------+------------------------+
+|
{"3-4":1,"13-5":1,"6-6":1,"14-5":1,"14-6":1,"14-2":1,"14-3":1,"19-0":1,"11-5":1,"13-2":1,"11-6":2,"11-3":1,"12-6":1,"6-5":1,"5-5":1,"9-2":1,"9-5":1,"9-6":1,"5-2":1,"7-6":1,"7-5":1,"7-4":1,"17-5":1,"8-5":1,"10-2":1,"10-5":1,"10-6":1}
| checkin | JwUE5GmEO-sH1FuwJgKBlQ |
+|
{"6-6":2,"6-5":1,"7-6":1,"7-5":1,"8-5":2,"10-5":1,"9-3":1,"12-5":1,"15-3":1,"15-5":1,"15-6":1,"16-3":1,"10-0":1,"15-4":1,"10-4":1,"8-2":1}
| checkin | uGykseHzyS5xAMWoN6YUqA |
++------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------+------------------------+
+</code></pre></div>
+<div class="admonition note">
+ <p class="first admonition-title">Note</p>
+ <p class="last">This document aligns Drill output for example purposes.
Drill output is not aligned in this case. </p>
+</div>
+
+<p>You query the data in JSON files directly. Schema definitions in Hive store
are not necessary. The names of the elements within the
<code>checkin_info</code> column are different between the first and second
row.</p>
+
+<p>Drill provides a function called KVGEN (Key Value Generator) which is
useful when working with complex data that contains arbitrary maps consisting
of dynamic and unknown element names such as checkin_info. KVGEN turns the
dynamic map into an array of key-value pairs where keys represent the dynamic
element names.</p>
+
+<p>Letâs apply KVGEN on the <code>checkin_info</code> element to generate
key-value pairs.</p>
+<div class="highlight"><pre><code class="language-text" data-lang="text">0:
jdbc:drill:zk=local> SELECT KVGEN(checkin_info) checkins FROM
dfs.`/users/nrentachintala/Downloads/yelp/yelp_academic_dataset_checkin.json`
LIMIT 2;
++------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| checkins
|
++------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+|
[{"key":"3-4","value":1},{"key":"13-5","value":1},{"key":"6-6","value":1},{"key":"14-5","value":1},{"key":"14-6","value":1},{"key":"14-2","value":1},{"key":"14-3","value":1},{"key":"19-0","value":1},{"key":"11-5","value":1},{"key":"13-2","value":1},{"key":"11-6","value":2},{"key":"11-3","value":1},{"key":"12-6","value":1},{"key":"6-5","value":1},{"key":"5-5","value":1},{"key":"9-2","value":1},{"key":"9-5","value":1},{"key":"9-6","value":1},{"key&
quot;:"5-2","value":1},{"key":"7-6","value":1},{"key":"7-5","value":1},{"key":"7-4","value":1},{"key":"17-5","value":1},{"key":"8-5","value":1},{"key":"10-2","value":1},{"key":"10-5","value":1},{"key":"10-6","value":1}]
|
+|
[{"key":"6-6","value":2},{"key":"6-5","value":1},{"key":"7-6","value":1},{"key":"7-5","value":1},{"key":"8-5","value":2},{"key":"10-5","value":1},{"key":"9-3","value":1},{"key":"12-5","value":1},{"key":"15-3","value":1},{"key":"15-5","value":1},{"key":"15-6","value":1},{"key":"16-3","value":1},{"key":"10-0","value":1},{"key":"15-4","value":1},{"key":"10-4","value":1},{"key":"8-2","value":1}]
|
++------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+</code></pre></div>
+<p>Drill provides another function to operate on complex data called
âFlattenâ to break the list of key-value pairs resulting from âKVGenâ
into separate rows to further apply analytic functions on it.</p>
+<div class="highlight"><pre><code class="language-text" data-lang="text">0:
jdbc:drill:zk=local> SELECT FLATTEN(KVGEN(checkin_info)) checkins FROM
dfs.`/users/nrentachintala/Downloads/yelp/yelp_academic_dataset_checkin.json`
LIMIT 20;
++--------------------------+
+| checkins |
++--------------------------+
+| {"key":"3-4","value":1} |
+| {"key":"13-5","value":1} |
+| {"key":"6-6","value":1} |
+| {"key":"14-5","value":1} |
+| {"key":"14-6","value":1} |
+| {"key":"14-2","value":1} |
+| {"key":"14-3","value":1} |
+| {"key":"19-0","value":1} |
+| {"key":"11-5","value":1} |
+| {"key":"13-2","value":1} |
+| {"key":"11-6","value":2} |
+| {"key":"11-3","value":1} |
+| {"key":"12-6","value":1} |
+| {"key":"6-5","value":1} |
+| {"key":"5-5","value":1} |
+| {"key":"9-2","value":1} |
+| {"key":"9-5","value":1} |
+| {"key":"9-6","value":1} |
+| {"key":"5-2","value":1} |
+| {"key":"7-6","value":1} |
++--------------------------+
+</code></pre></div>
+<p>You can get value from the data quickly by applying both KVGEN and FLATTEN
functions on the datasets on the fly--no need for time-consuming schema
definitions and data storage in intermediate formats.</p>
+
+<p>On the output of flattened data, you use standard SQL functionality such as
filters , aggregates, and sort. Letâs see a few examples.</p>
+
+<h3 id="get-the-total-number-of-check-ins-recorded-in-the-yelp-dataset">Get
the total number of check-ins recorded in the Yelp dataset</h3>
+<div class="highlight"><pre><code class="language-text" data-lang="text">0:
jdbc:drill:zk=local> SELECT SUM(checkintbl.checkins.`value`) AS
TotalCheckins FROM (
+. . . . . . . . . . . > SELECT FLATTEN(KVGEN(checkin_info)) checkins FROM
dfs.`/users/nrentachintala/Downloads/yelp/yelp_academic_dataset_checkin.json` )
checkintbl
+. . . . . . . . . . . > ;
++---------------+
+| TotalCheckins |
++---------------+
+| 4713811 |
++---------------+
+</code></pre></div>
+<h3 id="get-the-number-of-check-ins-specifically-for-sunday-midnights">Get the
number of check-ins specifically for Sunday midnights</h3>
+<div class="highlight"><pre><code class="language-text" data-lang="text">0:
jdbc:drill:zk=local> SELECT SUM(checkintbl.checkins.`value`) AS
SundayMidnightCheckins FROM (
+. . . . . . . . . . . > SELECT FLATTEN(KVGEN(checkin_info)) checkins FROM
dfs.`/users/nrentachintala/Downloads/yelp/yelp_academic_dataset_checkin.json` )
checkintbl WHERE checkintbl.checkins.key='23-0';
++------------------------+
+| SundayMidnightCheckins |
++------------------------+
+| 8575 |
++------------------------+
+</code></pre></div>
+<h3 id="get-the-number-of-check-ins-per-day-of-the-week">Get the number of
check-ins per day of the week</h3>
+<div class="highlight"><pre><code class="language-text" data-lang="text">0:
jdbc:drill:zk=local> SELECT `right`(checkintbl.checkins.key,1)
WeekDay,sum(checkintbl.checkins.`value`) TotalCheckins from (
+. . . . . . . . . . . > select flatten(kvgen(checkin_info)) checkins FROM
dfs.`/users/nrentachintala/Downloads/yelp/yelp_academic_dataset_checkin.json`
) checkintbl GROUP BY `right`(checkintbl.checkins.key,1) ORDER BY TotalCheckins;
++------------+---------------+
+| WeekDay | TotalCheckins |
++------------+---------------+
+| 1 | 545626 |
+| 0 | 555038 |
+| 2 | 555747 |
+| 3 | 596296 |
+| 6 | 735830 |
+| 4 | 788073 |
+| 5 | 937201 |
++------------+---------------+
+</code></pre></div>
+<h3 id="get-the-number-of-check-ins-per-hour-of-the-day">Get the number of
check-ins per hour of the day</h3>
+<div class="highlight"><pre><code class="language-text" data-lang="text">0:
jdbc:drill:zk=local> SELECT
SUBSTR(checkintbl.checkins.key,1,strpos(checkintbl.checkins.key,'-')-1)
AS HourOfTheDay ,SUM(checkintbl.checkins.`value`) TotalCheckins FROM (
+. . . . . . . . . . . > SELECT FLATTEN(KVGEN(checkin_info)) checkins FROM
dfs.`/users/nrentachintala/Downloads/yelp/yelp_academic_dataset_checkin.json` )
checkintbl GROUP BY
SUBSTR(checkintbl.checkins.key,1,strpos(checkintbl.checkins.key,'-')-1)
ORDER BY TotalCheckins;
++--------------+---------------+
+| HourOfTheDay | TotalCheckins |
++--------------+---------------+
+| 3 | 20357 |
+| 4 | 21076 |
+| 2 | 28116 |
+| 5 | 33842 |
+| 1 | 45467 |
+| 6 | 54174 |
+| 0 | 74127 |
+| 7 | 96329 |
+| 23 | 102009 |
+| 8 | 130091 |
+| 22 | 140338 |
+| 9 | 162913 |
+| 21 | 211949 |
+| 10 | 220687 |
+| 15 | 261384 |
+| 14 | 276188 |
+| 16 | 292547 |
+| 20 | 293783 |
+| 13 | 328373 |
+| 11 | 338675 |
+| 17 | 374186 |
+| 19 | 385381 |
+| 12 | 399797 |
+| 18 | 422022 |
++--------------+---------------+
+</code></pre></div>
+<hr>
+
+<h2 id="summary">Summary</h2>
+
+<p>In this tutorial, you surf both structured and semi-structured data without
any upfront schema management or ETL.</p>
+
+
+
+ <div class="doc-nav">
+
+ <span class="previous-toc"><a href="/drill/docs/summary/">â
Summary</a></span><span class="next-toc"><a
href="/drill/docs/analyzing-social-media/">Analyzing Social Media â</a></span>
+</div>
+
+
+ </div>
+ </div>
+</div>
+
+ </div>
+ <p class="push"></p>
+<div id="footer" class="mw">
+<div class="wrapper">
+Copyright © 2012-2014 The Apache Software Foundation, licensed under the
Apache License, Version 2.0.<br>
+Apache and the Apache feather logo are trademarks of The Apache Software
Foundation. Other names appearing on the site may be trademarks of their
respective owners.<br/><br/>
+</div>
+</div>
+
+ <script>
+(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
+m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+})(window,document,'script','//www.google-analytics.com/analytics.js','ga');
+
+ga('create', 'UA-53379651-1', 'auto');
+ga('send', 'pageview');
+</script>
+<script type="text/javascript"
src="//s7.addthis.com/js/300/addthis_widget.js#pubid=ra-548b2caa33765e8d"
async="async"></script>
+</body>
+</html>