http://git-wip-us.apache.org/repos/asf/drill-site/blob/c4de0f83/docs/analyzing-highly-dynamic-datasets/index.html
----------------------------------------------------------------------
diff --git a/docs/analyzing-highly-dynamic-datasets/index.html
b/docs/analyzing-highly-dynamic-datasets/index.html
new file mode 100644
index 0000000..092fc4b
--- /dev/null
+++ b/docs/analyzing-highly-dynamic-datasets/index.html
@@ -0,0 +1,1004 @@
+<!DOCTYPE html>
+<html>
+
+<head>
+
+<meta charset="UTF-8">
+<meta name=viewport content="width=device-width, initial-scale=1">
+
+
+<title>Analyzing Highly Dynamic Datasets - Apache Drill</title>
+
+<link href="/css/syntax.css" rel="stylesheet" type="text/css">
+<link href="/css/style.css" rel="stylesheet" type="text/css">
+<link href="/css/arrows.css" rel="stylesheet" type="text/css">
+<link href="/css/breadcrumbs.css" rel="stylesheet" type="text/css">
+<link href="/css/code.css" rel="stylesheet" type="text/css">
+<link rel="stylesheet"
href="//maxcdn.bootstrapcdn.com/font-awesome/4.3.0/css/font-awesome.min.css">
+<link href="/css/responsive.css" rel="stylesheet" type="text/css">
+
+<link rel="shortcut icon" href="/favicon.ico" type="image/x-icon">
+<link rel="icon" href="/favicon.ico" type="image/x-icon">
+
+<script language="javascript" type="text/javascript"
src="/js/lib/jquery-1.11.1.min.js"></script>
+<script language="javascript" type="text/javascript"
src="/js/lib/jquery.easing.1.3.js"></script>
+<script language="javascript" type="text/javascript"
src="/js/modernizr.custom.js"></script>
+<script language="javascript" type="text/javascript"
src="/js/script.js"></script>
+<script language="javascript" type="text/javascript"
src="/js/drill.js"></script>
+
+
+</head>
+
+<body onResize="resized();">
+ <div class="page-wrap">
+ <div class="bui"></div>
+
+<div id="menu" class="mw">
+<ul>
+ <li class='toc-categories'>
+ <a class="expand-toc-icon" href="javascript:void(0);"><i class="fa
fa-bars"></i></a>
+ </li>
+ <li class="logo"><a href="/"></a></li>
+ <li class='expand-menu'>
+ <a href="javascript:void(0);"><span class='menu-text'>Menu</span><span
class='expand-icon'><i class="fa fa-bars"></i></span></a>
+ </li>
+ <li class='clear-float'></li>
+ <li class="documentation-menu">
+ <a href="/docs/">Documentation</a>
+ <ul>
+
+ <li><a href="/docs/getting-started/">Getting Started</a></li>
+
+ <li><a href="/docs/architecture/">Architecture</a></li>
+
+ <li><a href="/docs/tutorials/">Tutorials</a></li>
+
+ <li><a href="/docs/install-drill/">Install Drill</a></li>
+
+ <li><a href="/docs/connect-a-data-source/">Connect a Data
Source</a></li>
+
+ <li><a href="/docs/odbc-jdbc-interfaces/">ODBC/JDBC Interfaces</a></li>
+
+ <li><a href="/docs/query-data/">Query Data</a></li>
+
+ <li><a href="/docs/sql-reference/">SQL Reference</a></li>
+
+ <li><a href="/docs/data-sources-and-file-formats/">Data Sources and
File Formats</a></li>
+
+ <li><a href="/docs/develop-custom-functions/">Develop Custom
Functions</a></li>
+
+ <li><a href="/docs/manage-drill/">Manage Drill</a></li>
+
+ <li><a href="/docs/developer-information/">Developer
Information</a></li>
+
+ <li><a href="/docs/release-notes/">Release Notes</a></li>
+
+ <li><a href="/docs/sample-datasets/">Sample Datasets</a></li>
+
+ <li><a href="/docs/archived-pages/">Archived Pages</a></li>
+
+ <li><a href="/docs/progress-reports/">Progress Reports</a></li>
+
+ <li><a href="/docs/project-bylaws/">Project Bylaws</a></li>
+
+ </ul>
+ </li>
+ <li class='nav'>
+ <a href="/community-resources/">Community</a>
+ <ul>
+ <li><a href="/team/">Team</a></li>
+ <li><a href="/mailinglists/">Mailing Lists</a></li>
+ <li><a href="/community-resources/">Community Resources</a></li>
+ </ul>
+ </li>
+ <li class='nav'><a href="/faq/">FAQ</a></li>
+ <li class='nav'><a href="/blog/">Blog</a></li>
+ <li id="twitter-menu-item"><a href="https://twitter.com/apachedrill"
title="apachedrill on twitter" target="_blank"><img
src="/images/twitter_32_26_white.png" alt="twitter logo" align="center"></a>
</li>
+ <li class='search-bar'>
+ <form id="drill-search-form">
+ <input type="text" placeholder="Search Apache Drill"
id="drill-search-term" />
+ <button type="submit">
+ <i class="fa fa-search"></i>
+ </button>
+ </form>
+ </li>
+ <li class="d">
+ <a href="/download/">
+ <i class="fa fa-cloud-download"></i> Download
+ </a>
+ </li>
+</ul>
+</div>
+
+
+
+
+
+
+
+<aside class="sidebar">
+ <div class="docsidebar">
+ <div class="docsidebarwrapper">
+ <ul style="display: block;">
+
+
+ <li class="toctree-l1"><a href="javascript: void(0);">Getting
Started</a></li>
+ <ul style="display: none">
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/docs/drill-introduction/">Drill Introduction</a></li>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/docs/why-drill/">Why Drill</a></li>
+
+
+ </ul>
+
+
+
+ <li class="toctree-l1"><a href="javascript:
void(0);">Architecture</a></li>
+ <ul style="display: none">
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/docs/architecture-introduction/">Architecture Introduction</a></li>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/docs/core-modules/">Core Modules</a></li>
+
+
+
+ <li class="toctree-l2"><a href="javascript:
void(0);">Architectural Highlights</a></li>
+ <ul style="display: none">
+
+ <li class="toctree-l3"><a class="reference internal"
href="/docs/flexibility/">Flexibility</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/docs/performance/">Performance</a></li>
+
+ </ul>
+
+
+ </ul>
+
+
+
+ <li class="toctree-l1 current_section "><a href="javascript:
void(0);">Tutorials</a></li>
+ <ul class="current_section">
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/docs/tutorials-introduction/">Tutorials Introduction</a></li>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/docs/drill-in-10-minutes/">Drill in 10 Minutes</a></li>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/docs/analyzing-the-yelp-academic-dataset/">Analyzing the Yelp Academic
Dataset</a></li>
+
+
+
+ <li class="toctree-l2"><a href="javascript: void(0);">Learn
Drill with the MapR Sandbox</a></li>
+ <ul style="display: none">
+
+ <li class="toctree-l3"><a class="reference internal"
href="/docs/about-the-mapr-sandbox/">About the MapR Sandbox</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/docs/installing-the-apache-drill-sandbox/">Installing the Apache Drill
Sandbox</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/docs/getting-to-know-the-drill-sandbox/">Getting to Know the Drill
Sandbox</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/docs/lession-1-learn-about-the-data-set/">Lession 1: Learn about the
Data Set</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/docs/lession-2-run-queries-with-ansi-sql/">Lession 2: Run Queries with
ANSI SQL</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/docs/lession-3-run-queries-on-complex-data-types/">Lession 3: Run
Queries on Complex Data Types</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/docs/summary/">Summary</a></li>
+
+ </ul>
+
+
+
+ <li class="toctree-l2 current"><a class="reference internal"
href="/docs/analyzing-highly-dynamic-datasets/">Analyzing Highly Dynamic
Datasets</a></li>
+
+
+ </ul>
+
+
+
+ <li class="toctree-l1"><a href="javascript: void(0);">Install
Drill</a></li>
+ <ul style="display: none">
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/docs/install-drill-introduction/">Install Drill Introduction</a></li>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/docs/deploying-drill-in-a-cluster/">Deploying Drill in a Cluster</a></li>
+
+
+
+ <li class="toctree-l2"><a href="javascript: void(0);">Installing
Drill in Embedded Mode</a></li>
+ <ul style="display: none">
+
+ <li class="toctree-l3"><a class="reference internal"
href="/docs/embedded-mode-prerequisites/">Embedded Mode Prerequisites</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/docs/installing-drill-on-linux/">Installing Drill on Linux</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/docs/installing-drill-on-mac-os-x/">Installing Drill on Mac OS X</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/docs/installing-drill-on-windows/">Installing Drill on Windows</a></li>
+
+ </ul>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/docs/installing-drill-in-distributed-mode/">Installing Drill in
Distributed Mode</a></li>
+
+
+ </ul>
+
+
+
+ <li class="toctree-l1"><a href="javascript: void(0);">Connect a Data
Source</a></li>
+ <ul style="display: none">
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/docs/connect-a-data-source-introduction/">Connect a Data Source
Introduction</a></li>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/docs/storage-plugin-registration/">Storage Plugin Registration</a></li>
+
+
+
+ <li class="toctree-l2"><a href="javascript: void(0);">Storage
Plugin Configuration</a></li>
+ <ul style="display: none">
+
+ <li class="toctree-l3"><a class="reference internal"
href="/docs/plugin-configuration-introduction/">Plugin Configuration
Introduction</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/docs/workspaces/">Workspaces</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/docs/file-system-storage-plugin/">File System Storage Plugin</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/docs/hbase-storage-plugin/">HBase Storage Plugin</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/docs/hive-storage-plugin/">Hive Storage Plugin</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/docs/drill-default-input-format/">Drill Default Input Format</a></li>
+
+ </ul>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/docs/mongodb-plugin-for-apache-drill/">MongoDB Plugin for Apache
Drill</a></li>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/docs/mapr-db-format/">MapR-DB Format</a></li>
+
+
+ </ul>
+
+
+
+ <li class="toctree-l1"><a href="javascript: void(0);">ODBC/JDBC
Interfaces</a></li>
+ <ul style="display: none">
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/docs/interfaces-introduction/">Interfaces Introduction</a></li>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/docs/using-jdbc/">Using JDBC</a></li>
+
+
+
+ <li class="toctree-l2"><a href="javascript: void(0);">Using ODBC
on Linux and Mac OS X</a></li>
+ <ul style="display: none">
+
+ <li class="toctree-l3"><a class="reference internal"
href="/docs/odbc-on-linux-and-mac-introduction/">ODBC on Linux and Mac
Introduction</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/docs/installing-the-driver-on-linux/">Installing the Driver on
Linux</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/docs/installing-the-driver-on-mac-os-x/">Installing the Driver on Mac OS
X</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/docs/configuring-connections-on-linux-and-mac-os-x/">Configuring
Connections on Linux and Mac OS X</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/docs/driver-configuration-options/">Driver Configuration Options</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/docs/using-a-connection-string/">Using a Connection String</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/docs/advanced-properties/">Advanced Properties</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/docs/testing-the-odbc-connection/">Testing the ODBC Connection</a></li>
+
+ </ul>
+
+
+
+ <li class="toctree-l2"><a href="javascript: void(0);">Using ODBC
on Windows</a></li>
+ <ul style="display: none">
+
+ <li class="toctree-l3"><a class="reference internal"
href="/docs/installing-the-driver-on-windows/">Installing the Driver on
Windows</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/docs/configuring-connections-on-windows/">Configuring Connections on
Windows</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/docs/connecting-to-odbc-data-sources/">Connecting to ODBC Data
Sources</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/docs/tableau-examples/">Tableau Examples</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/docs/using-drill-explorer-on-windows/">Using Drill Explorer on
Windows</a></li>
+
+ </ul>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/docs/using-microstrategy-analytics-with-drill/">Using MicroStrategy
Analytics with Drill</a></li>
+
+
+ </ul>
+
+
+
+ <li class="toctree-l1"><a href="javascript: void(0);">Query
Data</a></li>
+ <ul style="display: none">
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/docs/query-data-introduction/">Query Data Introduction</a></li>
+
+
+
+ <li class="toctree-l2"><a href="javascript: void(0);">Querying a
File System</a></li>
+ <ul style="display: none">
+
+ <li class="toctree-l3"><a class="reference internal"
href="/docs/querying-a-file-system-introduction/">Querying a File System
Introduction</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/docs/querying-json-files/">Querying JSON Files</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/docs/querying-parquet-files/">Querying Parquet Files</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/docs/querying-plain-text-files/">Querying Plain Text Files</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/docs/querying-directories/">Querying Directories</a></li>
+
+ </ul>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/docs/querying-hbase/">Querying HBase</a></li>
+
+
+
+ <li class="toctree-l2"><a href="javascript: void(0);">Querying
Complex Data</a></li>
+ <ul style="display: none">
+
+ <li class="toctree-l3"><a class="reference internal"
href="/docs/querying-complex-data-introduction/">Querying Complex Data
Introduction</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/docs/sample-data-donuts/">Sample Data: Donuts</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/docs/selecting-flat-data/">Selecting Flat Data</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/docs/using-sql-functions-clauses-and-joins/">Using SQL Functions,
Clauses, and Joins</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/docs/selecting-nested-data-for-a-column/">Selecting Nested Data for a
Column</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/docs/selecting-multiple-columns-within-nested-data/">Selecting Multiple
Columns Within Nested Data</a></li>
+
+ </ul>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/docs/querying-hive/">Querying Hive</a></li>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/docs/querying-the-information-schema/">Querying the INFORMATION
SCHEMA</a></li>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/docs/querying-system-tables/">Querying System Tables</a></li>
+
+
+ </ul>
+
+
+
+ <li class="toctree-l1"><a href="javascript: void(0);">SQL
Reference</a></li>
+ <ul style="display: none">
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/docs/sql-reference-introduction/">SQL Reference Introduction</a></li>
+
+
+
+ <li class="toctree-l2"><a href="javascript: void(0);">Data
Types</a></li>
+ <ul style="display: none">
+
+ <li class="toctree-l3"><a class="reference internal"
href="/docs/supported-data-types/">Supported Data Types</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/docs/date-time-and-timestamp/">Date, Time, and Timestamp</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/docs/handling-different-data-types/">Handling Different Data
Types</a></li>
+
+ </ul>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/docs/lexical-structure/">Lexical Structure</a></li>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/docs/operators/">Operators</a></li>
+
+
+
+ <li class="toctree-l2"><a href="javascript: void(0);">SQL
Functions</a></li>
+ <ul style="display: none">
+
+ <li class="toctree-l3"><a class="reference internal"
href="/docs/about-sql-function-examples/">About SQL Function Examples</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/docs/math-and-trig/">Math and Trig</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/docs/data-type-conversion/">Data Type Conversion</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/docs/date-time-functions-and-arithmetic/">Date/Time Functions and
Arithmetic</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/docs/string-manipulation/">String Manipulation</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/docs/aggregate-and-aggregate-statistical/">Aggregate and Aggregate
Statistical</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/docs/functions-for-handling-nulls/">Functions for Handling Nulls</a></li>
+
+ </ul>
+
+
+
+ <li class="toctree-l2"><a href="javascript: void(0);">Nested
Data Functions</a></li>
+ <ul style="display: none">
+
+ <li class="toctree-l3"><a class="reference internal"
href="/docs/nested-data-limitations/">Nested Data Limitations</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/docs/flatten/">FLATTEN</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/docs/kvgen/">KVGEN</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/docs/repeated-count/">REPEATED_COUNT</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/docs/repeated-contains/">REPEATED_CONTAINS</a></li>
+
+ </ul>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/docs/query-directory-functions/">Query Directory Functions</a></li>
+
+
+
+ <li class="toctree-l2"><a href="javascript: void(0);">SQL
Commands</a></li>
+ <ul style="display: none">
+
+ <li class="toctree-l3"><a class="reference internal"
href="/docs/supported-sql-commands/">Supported SQL Commands</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/docs/alter-session-command/">ALTER SESSION Command</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/docs/alter-system-command/">ALTER SYSTEM Command</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/docs/create-table-as-ctas-command/">CREATE TABLE AS (CTAS)
command</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/docs/create-view-command/">CREATE VIEW command</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/docs/describe-command/">DESCRIBE Command</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/docs/explain-commands/">EXPLAIN commands</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/docs/select-statements/">SELECT Statements</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/docs/show-databases-and-show-schemas-command/">SHOW DATABASES AND SHOW
SCHEMAS Command</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/docs/show-files-command/">SHOW FILES Command</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/docs/show-tables-command/">SHOW TABLES Command</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/docs/use-command/">USE Command</a></li>
+
+ </ul>
+
+
+
+ <li class="toctree-l2"><a href="javascript: void(0);">SQL
Conditional Expressions</a></li>
+ <ul style="display: none">
+
+ <li class="toctree-l3"><a class="reference internal"
href="/docs/case/">CASE</a></li>
+
+ </ul>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/docs/reserved-keywords/">Reserved Keywords</a></li>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/docs/sql-extensions/">SQL Extensions</a></li>
+
+
+ </ul>
+
+
+
+ <li class="toctree-l1"><a href="javascript: void(0);">Data Sources
and File Formats</a></li>
+ <ul style="display: none">
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/docs/data-sources-and-file-formats-introduction/">Data Sources and File
Formats Introduction</a></li>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/docs/hive-to-drill-data-type-mapping/">Hive-to-Drill Data Type
Mapping</a></li>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/docs/deploying-and-using-a-hive-udf/">Deploying and Using a Hive
UDF</a></li>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/docs/parquet-format/">Parquet Format</a></li>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/docs/json-data-model/">JSON Data Model</a></li>
+
+
+ </ul>
+
+
+
+ <li class="toctree-l1"><a href="javascript: void(0);">Develop Custom
Functions</a></li>
+ <ul style="display: none">
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/docs/develop-custom-functions-introduction/">Develop Custom Functions
Introduction</a></li>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/docs/develop-a-simple-function/">Develop a Simple Function</a></li>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/docs/developing-an-aggregate-function/">Developing an Aggregate
Function</a></li>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/docs/adding-custom-functions-to-drill/">Adding Custom Functions to
Drill</a></li>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/docs/using-custom-functions-in-queries/">Using Custom Functions in
Queries</a></li>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/docs/custom-function-interfaces/">Custom Function Interfaces</a></li>
+
+
+ </ul>
+
+
+
+ <li class="toctree-l1"><a href="javascript: void(0);">Manage
Drill</a></li>
+ <ul style="display: none">
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/docs/manage-drill-introduction/">Manage Drill Introduction</a></li>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/docs/configuring-drill-in-a-dedicated-cluster/">Configuring Drill in a
Dedicated Cluster</a></li>
+
+
+
+ <li class="toctree-l2"><a href="javascript:
void(0);">Configuring a Multitenant Cluster</a></li>
+ <ul style="display: none">
+
+ <li class="toctree-l3"><a class="reference internal"
href="/docs/configuring-a-multitenant-cluster-introduction/">Configuring a
Multitenant Cluster Introduction</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/docs/configuring-multitenant-resources/">Configuring Multitenant
Resources</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/docs/configuring-resources-for-a-shared-drillbit/">Configuring Resources
for a Shared Drillbit</a></li>
+
+ </ul>
+
+
+
+ <li class="toctree-l2"><a href="javascript:
void(0);">Configuration Options</a></li>
+ <ul style="display: none">
+
+ <li class="toctree-l3"><a class="reference internal"
href="/docs/configuration-options-introduction/">Configuration Options
Introduction</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/docs/start-up-options/">Start-Up Options</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/docs/planning-and-execution-options/">Planning and Execution
Options</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/docs/persistent-configuration-storage/">Persistent Configuration
Storage</a></li>
+
+ </ul>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/docs/starting-stopping-drill/">Starting/Stopping Drill</a></li>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/docs/ports-used-by-drill/">Ports Used by Drill</a></li>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/docs/partition-pruning/">Partition Pruning</a></li>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/docs/monitoring-and-canceling-queries-in-the-drill-web-ui/">Monitoring
and Canceling Queries in the Drill Web UI</a></li>
+
+
+ </ul>
+
+
+
+ <li class="toctree-l1"><a href="javascript: void(0);">Developer
Information</a></li>
+ <ul style="display: none">
+
+
+ <li class="toctree-l2"><a href="javascript: void(0);">Develop
Drill</a></li>
+ <ul style="display: none">
+
+ <li class="toctree-l3"><a class="reference internal"
href="/docs/compiling-drill-from-source/">Compiling Drill from Source</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/docs/drill-patch-review-tool/">Drill Patch Review Tool</a></li>
+
+ </ul>
+
+
+
+ <li class="toctree-l2"><a href="javascript: void(0);">Contribute
to Drill</a></li>
+ <ul style="display: none">
+
+ <li class="toctree-l3"><a class="reference internal"
href="/docs/apache-drill-contribution-guidelines/">Apache Drill Contribution
Guidelines</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/docs/apache-drill-contribution-ideas/">Apache Drill Contribution
Ideas</a></li>
+
+ </ul>
+
+
+
+ <li class="toctree-l2"><a href="javascript: void(0);">Design
Docs</a></li>
+ <ul style="display: none">
+
+ <li class="toctree-l3"><a class="reference internal"
href="/docs/drill-plan-syntax/">Drill Plan Syntax</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/docs/rpc-overview/">RPC Overview</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/docs/query-stages/">Query Stages</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/docs/useful-research/">Useful Research</a></li>
+
+ <li class="toctree-l3"><a class="reference internal"
href="/docs/value-vectors/">Value Vectors</a></li>
+
+ </ul>
+
+
+ </ul>
+
+
+
+ <li class="toctree-l1"><a href="javascript: void(0);">Release
Notes</a></li>
+ <ul style="display: none">
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/docs/apache-drill-0-5-0-release-notes/">Apache Drill 0.5.0 Release
Notes</a></li>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/docs/apache-drill-0-4-0-release-notes/">Apache Drill 0.4.0 Release
Notes</a></li>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/docs/apache-drill-m1-release-notes-apache-drill-alpha/">Apache Drill M1
Release Notes (Apache Drill Alpha)</a></li>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/docs/apache-drill-m1-release-notes-apache-drill-alpha/">Apache Drill M1
Release Notes (Apache Drill Alpha)</a></li>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/docs/apache-drill-0-6-0-release-notes/">Apache Drill 0.6.0 Release
Notes</a></li>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/docs/apache-drill-0-7-0-release-notes/">Apache Drill 0.7.0 Release
Notes</a></li>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/docs/apache-drill-0-8-0-release-notes/">Apache Drill 0.8.0 Release
Notes</a></li>
+
+
+ </ul>
+
+
+
+ <li class="toctree-l1"><a href="javascript: void(0);">Sample
Datasets</a></li>
+ <ul style="display: none">
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/docs/aol-search/">AOL Search</a></li>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/docs/enron-emails/">Enron Emails</a></li>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/docs/wikipedia-edit-history/">Wikipedia Edit History</a></li>
+
+
+ </ul>
+
+
+
+ <li class="toctree-l1"><a href="javascript: void(0);">Archived
Pages</a></li>
+ <ul style="display: none">
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/docs/how-to-run-the-drill-demo/">How to Run the Drill Demo</a></li>
+
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/docs/what-is-apache-drill/">What is Apache Drill</a></li>
+
+
+ </ul>
+
+
+
+ <li class="toctree-l1"><a href="javascript: void(0);">Progress
Reports</a></li>
+ <ul style="display: none">
+
+
+ <li class="toctree-l2"><a class="reference internal"
href="/docs/2014-q1-drill-report/">2014 Q1 Drill Report</a></li>
+
+
+ </ul>
+
+
+
+ <li class="toctree-l1"><a class="reference internal"
href="/docs/project-bylaws/">Project Bylaws</a></li>
+
+
+ </ul>
+
+ </div>
+ </div>
+</aside>
+
+
+ <nav class="breadcrumbs">
+ <li><a href="/docs/">Docs</a></li>
+
+
+ <li><a href="/docs/tutorials/">Tutorials</a></li>
+
+ <li>Analyzing Highly Dynamic Datasets</li>
+</nav>
+
+ <div class="main-content-wrapper">
+ <div class="main-content">
+
+
+ <a class="edit-link"
href="https://github.com/apache/drill/blob/gh-pages/_docs/tutorials/050-analyzing-highly-dynamic-datasets.md"
target="_blank"><i class="fa fa-pencil-square-o"></i></a>
+
+
+ <div class="int_title">
+ <h1>Analyzing Highly Dynamic Datasets</h1>
+
+ </div>
+
+ <link href="/css/docpage.css" rel="stylesheet" type="text/css">
+
+ <div class="int_text" align="left">
+
+ <p>Todayâs data is dynamic and application-driven. The growth of a
new era of business applications driven by industry trends such as web, social,
mobile, and Internet of Things are generating datasets with new data types and
new data models. These applications are iterative, and the associated data
models typically are semi-structured, schema-less and constantly evolving.
Semi-structured data models can be complex/nested, schema-less, and capable of
having varying fields in every single row and of constantly evolving as fields
get added and removed frequently to meet business requirements. </p>
+
+<p>This tutorial shows you how to natively query dynamic datasets, such as
JSON, and derive insights from any type of data in minutes. The dataset used in
the example is from the Yelp check-ins dataset, which has the following
structure:</p>
+<div class="highlight"><pre><code class="language-text"
data-lang="text">check-in
+{
+ 'type': 'checkin',
+ 'business_id': (encrypted business id),
+ 'checkin_info': {
+ '0-0': (number of checkins from 00:00 to 01:00 on all Sundays),
+ '1-0': (number of checkins from 01:00 to 02:00 on all Sundays),
+ ...
+ '14-4': (number of checkins from 14:00 to 15:00 on all
Thursdays),
+ ...
+ '23-6': (number of checkins from 23:00 to 00:00 on all
Saturdays)
+ }, # if there was no checkin for a hour-day block it will not be in the
dataset
+}
+</code></pre></div>
+<p>It is worth repeating the comment at the bottom of this snippet:</p>
+<div class="highlight"><pre><code class="language-text" data-lang="text"> # if
there was no checkin for a hour-day block it will not be in the dataset.
+</code></pre></div>
+<p>The element names that you see in the <code>checkin_info</code> are unknown
upfront and can vary for every row. The data, although simple, is highly
dynamic data. To analyze the data there is no need to first represent this
dataset in a flattened relational structure, as you would using any other SQL
on Hadoop technology.</p>
+
+<hr>
+
+<p>Step 1: First download Drill, if you have not yet done so, onto your
machine</p>
+<div class="highlight"><pre><code class="language-text"
data-lang="text">http://drill.apache.org/download/
+tar -xvf apache-drill-0.8.0.tar
+</code></pre></div>
+<p>Install Drill locally on your desktop (embedded mode). You donât need
Hadoop.</p>
+
+<hr>
+
+<p>Step 2: Launch the SQLLine, a pre-packaged JDBC-based application with
Drill</p>
+<div class="highlight"><pre><code class="language-text"
data-lang="text">bin/sqlline -u jdbc:drill:zk=local
+</code></pre></div>
+<hr>
+
+<p>Step 3: Start analyzing the data using SQL</p>
+
+<p>First, letâs take a look at the dataset:</p>
+<div class="highlight"><pre><code class="language-text" data-lang="text">0:
jdbc:drill:zk=local> SELECT * FROM
dfs.`/users/nrentachintala/Downloads/yelp/yelp_academic_dataset_checkin.json`
limit 2;
++--------------+------------+-------------+
+| checkin_info | type | business_id |
++--------------+------------+-------------+
+|
{"3-4":1,"13-5":1,"6-6":1,"14-5":1,"14-6":1,"14-2":1,"14-3":1,"19-0":1,"11-5":1,"13-2":1,"11-6":2,"11-3":1,"12-6":1,"6-5":1,"5-5":1,"9-2":1,"9-5":1,"9-6":1,"5-2":1,"7-6":1,"7-5":1,"7-4":1,"17-5":1,"8-5":1,"10-2":1,"10-5":1,"10-6":1}
| checkin | JwUE5GmEO-sH1FuwJgKBlQ |
+|
{"6-6":2,"6-5":1,"7-6":1,"7-5":1,"8-5":2,"10-5":1,"9-3":1,"12-5":1,"15-3":1,"15-5":1,"15-6":1,"16-3":1,"10-0":1,"15-4":1,"10-4":1,"8-2":1}
| checkin | uGykseHzyS5xAMWoN6YUqA |
++--------------+------------+-------------+
+</code></pre></div>
+<p>You query the data in JSON files directly. Schema definitions in Hive store
are no necessary. The names of the elements within the
<code>checkin_info</code> column are different between the first and second
row.</p>
+
+<p>Drill provides a function called KVGEN (Key Value Generator) which is
useful when working with complex data that contain arbitrary maps consisting of
dynamic and unknown element names such as checkin_info. KVGEN turns the dynamic
map into an array of key-value pairs where keys represent the dynamic element
names.</p>
+
+<p>Letâs apply KVGEN on the <code>checkin_info</code> element to generate
key-value pairs.</p>
+<div class="highlight"><pre><code class="language-text" data-lang="text">0:
jdbc:drill:zk=local> SELECT KVGEN(checkin_info) checkins FROM
dfs.`/users/nrentachintala/Downloads/yelp/yelp_academic_dataset_checkin.json`
LIMIT 2;
++------------+
+| checkins |
++------------+
+|
[{"key":"3-4","value":1},{"key":"13-5","value":1},{"key":"6-6","value":1},{"key":"14-5","value":1},{"key":"14-6","value":1},{"key":"14-2","value":1},{"key":"14-3","value":1},{"key":"19-0","value":1},{"key":"11-5","value":1},{"key":"13-2","value":1},{"key":"11-6","value":2},{"key":"11-3","value":1},{"key":"12-6","value":1},{"key":"6-5","value":1},{"key":"5-5","value":1},{"key":"9-2","value":1},{"key":"9-5","value":1},{"key":"9-6","value":1},{"key&
quot;:"5-2","value":1},{"key":"7-6","value":1},{"key":"7-5","value":1},{"key":"7-4","value":1},{"key":"17-5","value":1},{"key":"8-5","value":1},{"key":"10-2","value":1},{"key":"10-5","value":1},{"key":"10-6","value":1}]
|
+|
[{"key":"6-6","value":2},{"key":"6-5","value":1},{"key":"7-6","value":1},{"key":"7-5","value":1},{"key":"8-5","value":2},{"key":"10-5","value":1},{"key":"9-3","value":1},{"key":"12-5","value":1},{"key":"15-3","value":1},{"key":"15-5","value":1},{"key":"15-6","value":1},{"key":"16-3","value":1},{"key":"10-0","value":1},{"key":"15-4","value":1},{"key":"10-4","value":1},{"key":"8-2","value":1}]
|
++------------+
+</code></pre></div>
+<p>Drill provides another function to operate on complex data called
âFlattenâ to break the list of key-value pairs resulting from âKVGenâ
into separate rows to further apply analytic functions on it.</p>
+<div class="highlight"><pre><code class="language-text" data-lang="text">0:
jdbc:drill:zk=local> SELECT FLATTEN(KVGEN(checkin_info)) checkins FROM
dfs.`/users/nrentachintala/Downloads/yelp/yelp_academic_dataset_checkin.json`
LIMIT 20;
++------------+
+| checkins |
++------------+
+| {"key":"3-4","value":1} |
+| {"key":"13-5","value":1} |
+| {"key":"6-6","value":1} |
+| {"key":"14-5","value":1} |
+| {"key":"14-6","value":1} |
+| {"key":"14-2","value":1} |
+| {"key":"14-3","value":1} |
+| {"key":"19-0","value":1} |
+| {"key":"11-5","value":1} |
+| {"key":"13-2","value":1} |
+| {"key":"11-6","value":2} |
+| {"key":"11-3","value":1} |
+| {"key":"12-6","value":1} |
+| {"key":"6-5","value":1} |
+| {"key":"5-5","value":1} |
+| {"key":"9-2","value":1} |
+| {"key":"9-5","value":1} |
+| {"key":"9-6","value":1} |
+| {"key":"5-2","value":1} |
+| {"key":"7-6","value":1} |
++------------+
+</code></pre></div>
+<p>You can get value from the data quickly by applying both KVGEN and FLATTEN
functions on the datasets on the fly--no need for time-consuming schema
definitions and data storage in intermediate formats.</p>
+
+<p>On the output of flattened data, you use standard SQL functionality such as
filters , aggregates, and sort. Letâs see a few examples.</p>
+
+<h2 id="get-the-total-number-of-check-ins-recorded-in-the-yelp-dataset">Get
the total number of check-ins recorded in the Yelp dataset</h2>
+<div class="highlight"><pre><code class="language-text" data-lang="text">0:
jdbc:drill:zk=local> SELECT SUM(checkintbl.checkins.`value`) AS
TotalCheckins FROM (
+. . . . . . . . . . . > SELECT FLATTEN(KVGEN(checkin_info)) checkins FROM
dfs.`/users/nrentachintala/Downloads/yelp/yelp_academic_dataset_checkin.json` )
checkintbl
+. . . . . . . . . . . > ;
++---------------+
+| TotalCheckins |
++---------------+
+| 4713811 |
++---------------+
+</code></pre></div>
+<h2 id="get-the-number-of-check-ins-specifically-for-sunday-midnights">Get the
number of check-ins specifically for Sunday midnights</h2>
+<div class="highlight"><pre><code class="language-text" data-lang="text">0:
jdbc:drill:zk=local> SELECT SUM(checkintbl.checkins.`value`) AS
SundayMidnightCheckins FROM (
+. . . . . . . . . . . > SELECT FLATTEN(KVGEN(checkin_info)) checkins FROM
dfs.`/users/nrentachintala/Downloads/yelp/yelp_academic_dataset_checkin.json` )
checkintbl WHERE checkintbl.checkins.key='23-0';
++------------------------+
+| SundayMidnightCheckins |
++------------------------+
+| 8575 |
++------------------------+
+</code></pre></div>
+<h2 id="get-the-number-of-check-ins-per-day-of-the-week">Get the number of
check-ins per day of the week</h2>
+<div class="highlight"><pre><code class="language-text" data-lang="text">0:
jdbc:drill:zk=local> SELECT `right`(checkintbl.checkins.key,1)
WeekDay,sum(checkintbl.checkins.`value`) TotalCheckins from (
+. . . . . . . . . . . > select flatten(kvgen(checkin_info)) checkins FROM
dfs.`/users/nrentachintala/Downloads/yelp/yelp_academic_dataset_checkin.json`
) checkintbl GROUP BY `right`(checkintbl.checkins.key,1) ORDER BY TotalCheckins;
++------------+---------------+
+| WeekDay | TotalCheckins |
++------------+---------------+
+| 1 | 545626 |
+| 0 | 555038 |
+| 2 | 555747 |
+| 3 | 596296 |
+| 6 | 735830 |
+| 4 | 788073 |
+| 5 | 937201 |
++------------+---------------+
+</code></pre></div>
+<h2 id="get-the-number-of-check-ins-per-hour-of-the-day">Get the number of
check-ins per hour of the day</h2>
+<div class="highlight"><pre><code class="language-text" data-lang="text">0:
jdbc:drill:zk=local> SELECT
SUBSTR(checkintbl.checkins.key,1,strpos(checkintbl.checkins.key,'-')-1)
AS HourOfTheDay ,SUM(checkintbl.checkins.`value`) TotalCheckins FROM (
+. . . . . . . . . . . > SELECT FLATTEN(KVGEN(checkin_info)) checkins FROM
dfs.`/users/nrentachintala/Downloads/yelp/yelp_academic_dataset_checkin.json` )
checkintbl GROUP BY
SUBSTR(checkintbl.checkins.key,1,strpos(checkintbl.checkins.key,'-')-1)
ORDER BY TotalCheckins;
++--------------+---------------+
+| HourOfTheDay | TotalCheckins |
++--------------+---------------+
+| 3 | 20357 |
+| 4 | 21076 |
+| 2 | 28116 |
+| 5 | 33842 |
+| 1 | 45467 |
+| 6 | 54174 |
+| 0 | 74127 |
+| 7 | 96329 |
+| 23 | 102009 |
+| 8 | 130091 |
+| 22 | 140338 |
+| 9 | 162913 |
+| 21 | 211949 |
+| 10 | 220687 |
+| 15 | 261384 |
+| 14 | 276188 |
+| 16 | 292547 |
+| 20 | 293783 |
+| 13 | 328373 |
+| 11 | 338675 |
+| 17 | 374186 |
+| 19 | 385381 |
+| 12 | 399797 |
+| 18 | 422022 |
++--------------+---------------+
+</code></pre></div>
+<hr>
+
+<h2 id="summary">Summary</h2>
+
+<p>In this tutorial, you surf both structured and semi-structured data without
any upfront schema management or ETL.</p>
+
+
+
+ <div class="doc-nav">
+
+ <span class="previous-toc"><a href="/docs/summary/">â
Summary</a></span><span class="next-toc"><a href="/docs/install-drill/">Install
Drill â</a></span>
+</div>
+
+
+ </div>
+ </div>
+ </div>
+
+ </div>
+ <p class="push"></p>
+<div id="footer" class="mw">
+<div class="wrapper">
+Copyright © 2012-2014 The Apache Software Foundation, licensed under the
Apache License, Version 2.0.<br>
+Apache and the Apache feather logo are trademarks of The Apache Software
Foundation. Other names appearing on the site may be trademarks of their
respective owners.<br/><br/>
+</div>
+</div>
+
+ <script>
+(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
+(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
+m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
+})(window,document,'script','//www.google-analytics.com/analytics.js','ga');
+
+ga('create', 'UA-53379651-1', 'auto');
+ga('send', 'pageview');
+</script>
+
+</body>
+</html>