[10/26] drill git commit: social media tutorial

tshiran Sat, 16 May 2015 23:51:40 -0700

social media tutorial


Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/c18b098f
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/c18b098f
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/c18b098f

Branch: refs/heads/gh-pages
Commit: c18b098fdf07f77b0b22f574414a7604f50a88b1
Parents: 10e158f
Author: Kristine Hahn <[email protected]>
Authored: Fri May 15 15:01:57 2015 -0700
Committer: Kristine Hahn <[email protected]>
Committed: Fri May 15 15:01:57 2015 -0700

----------------------------------------------------------------------
 _data/docs.json                                 |  71 ++++++-
 .../050-json-data-model.md                      |   2 +-
 _docs/img/socialmed1.png                        | Bin 0 -> 91288 bytes
 _docs/img/socialmed10.png                       | Bin 0 -> 50143 bytes
 _docs/img/socialmed11.png                       | Bin 0 -> 21996 bytes
 _docs/img/socialmed12.png                       | Bin 0 -> 51774 bytes
 _docs/img/socialmed13.png                       | Bin 0 -> 209081 bytes
 _docs/img/socialmed2.png                        | Bin 0 -> 58175 bytes
 _docs/img/socialmed3.png                        | Bin 0 -> 37943 bytes
 _docs/img/socialmed4.png                        | Bin 0 -> 19875 bytes
 _docs/img/socialmed5.png                        | Bin 0 -> 53990 bytes
 _docs/img/socialmed6.png                        | Bin 0 -> 35748 bytes
 _docs/img/socialmed7.png                        | Bin 0 -> 59350 bytes
 _docs/img/socialmed8.png                        | Bin 0 -> 4234 bytes
 _docs/img/socialmed9.png                        | Bin 0 -> 11851 bytes
 .../030-analyzing-the-yelp-academic-dataset.md  |   6 +-
 _docs/tutorials/060-analyzing-social-media.md   | 206 +++++++++++++++++++
 17 files changed, 271 insertions(+), 14 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/drill/blob/c18b098f/_data/docs.json
----------------------------------------------------------------------
diff --git a/_data/docs.json b/_data/docs.json
index 28fb349..5f133d5 100644
--- a/_data/docs.json
+++ b/_data/docs.json
@@ -185,8 +185,8 @@
                 }
             ], 
             "children": [], 
-            "next_title": "Install Drill", 
-            "next_url": "/docs/install-drill/", 
+            "next_title": "Analyzing Social Media", 
+            "next_url": "/docs/analyzing-social-media/", 
             "parent": "Tutorials", 
             "previous_title": "Summary", 
             "previous_url": "/docs/summary/", 
@@ -194,6 +194,23 @@
             "title": "Analyzing Highly Dynamic Datasets", 
             "url": "/docs/analyzing-highly-dynamic-datasets/"
         }, 
+        "Analyzing Social Media": {
+            "breadcrumbs": [
+                {
+                    "title": "Tutorials", 
+                    "url": "/docs/tutorials/"
+                }
+            ], 
+            "children": [], 
+            "next_title": "Install Drill", 
+            "next_url": "/docs/install-drill/", 
+            "parent": "Tutorials", 
+            "previous_title": "Analyzing Highly Dynamic Datasets", 
+            "previous_url": "/docs/analyzing-highly-dynamic-datasets/", 
+            "relative_path": "_docs/tutorials/060-analyzing-social-media.md", 
+            "title": "Analyzing Social Media", 
+            "url": "/docs/analyzing-social-media/"
+        }, 
         "Analyzing the Yelp Academic Dataset": {
             "breadcrumbs": [
                 {
@@ -3296,8 +3313,8 @@
             "next_title": "Install Drill Introduction", 
             "next_url": "/docs/install-drill-introduction/", 
             "parent": "", 
-            "previous_title": "Analyzing Highly Dynamic Datasets", 
-            "previous_url": "/docs/analyzing-highly-dynamic-datasets/", 
+            "previous_title": "Analyzing Social Media", 
+            "previous_url": "/docs/analyzing-social-media/", 
             "relative_path": "_docs/040-install-drill.md", 
             "title": "Install Drill", 
             "url": "/docs/install-drill/"
@@ -8083,14 +8100,31 @@
                         }
                     ], 
                     "children": [], 
-                    "next_title": "Install Drill", 
-                    "next_url": "/docs/install-drill/", 
+                    "next_title": "Analyzing Social Media", 
+                    "next_url": "/docs/analyzing-social-media/", 
                     "parent": "Tutorials", 
                     "previous_title": "Summary", 
                     "previous_url": "/docs/summary/", 
                     "relative_path": 
"_docs/tutorials/050-analyzing-highly-dynamic-datasets.md", 
                     "title": "Analyzing Highly Dynamic Datasets", 
                     "url": "/docs/analyzing-highly-dynamic-datasets/"
+                }, 
+                {
+                    "breadcrumbs": [
+                        {
+                            "title": "Tutorials", 
+                            "url": "/docs/tutorials/"
+                        }
+                    ], 
+                    "children": [], 
+                    "next_title": "Install Drill", 
+                    "next_url": "/docs/install-drill/", 
+                    "parent": "Tutorials", 
+                    "previous_title": "Analyzing Highly Dynamic Datasets", 
+                    "previous_url": 
"/docs/analyzing-highly-dynamic-datasets/", 
+                    "relative_path": 
"_docs/tutorials/060-analyzing-social-media.md", 
+                    "title": "Analyzing Social Media", 
+                    "url": "/docs/analyzing-social-media/"
                 }
             ], 
             "next_title": "Tutorials Introduction", 
@@ -9111,14 +9145,31 @@
                         }
                     ], 
                     "children": [], 
-                    "next_title": "Install Drill", 
-                    "next_url": "/docs/install-drill/", 
+                    "next_title": "Analyzing Social Media", 
+                    "next_url": "/docs/analyzing-social-media/", 
                     "parent": "Tutorials", 
                     "previous_title": "Summary", 
                     "previous_url": "/docs/summary/", 
                     "relative_path": 
"_docs/tutorials/050-analyzing-highly-dynamic-datasets.md", 
                     "title": "Analyzing Highly Dynamic Datasets", 
                     "url": "/docs/analyzing-highly-dynamic-datasets/"
+                }, 
+                {
+                    "breadcrumbs": [
+                        {
+                            "title": "Tutorials", 
+                            "url": "/docs/tutorials/"
+                        }
+                    ], 
+                    "children": [], 
+                    "next_title": "Install Drill", 
+                    "next_url": "/docs/install-drill/", 
+                    "parent": "Tutorials", 
+                    "previous_title": "Analyzing Highly Dynamic Datasets", 
+                    "previous_url": 
"/docs/analyzing-highly-dynamic-datasets/", 
+                    "relative_path": 
"_docs/tutorials/060-analyzing-social-media.md", 
+                    "title": "Analyzing Social Media", 
+                    "url": "/docs/analyzing-social-media/"
                 }
             ], 
             "next_title": "Tutorials Introduction", 
@@ -9358,8 +9409,8 @@
             "next_title": "Install Drill Introduction", 
             "next_url": "/docs/install-drill-introduction/", 
             "parent": "", 
-            "previous_title": "Analyzing Highly Dynamic Datasets", 
-            "previous_url": "/docs/analyzing-highly-dynamic-datasets/", 
+            "previous_title": "Analyzing Social Media", 
+            "previous_url": "/docs/analyzing-social-media/", 
             "relative_path": "_docs/040-install-drill.md", 
             "title": "Install Drill", 
             "url": "/docs/install-drill/"

http://git-wip-us.apache.org/repos/asf/drill/blob/c18b098f/_docs/data-sources-and-file-formats/050-json-data-model.md
----------------------------------------------------------------------
diff --git a/_docs/data-sources-and-file-formats/050-json-data-model.md 
b/_docs/data-sources-and-file-formats/050-json-data-model.md
index 0e8b0d3..90b69a1 100644
--- a/_docs/data-sources-and-file-formats/050-json-data-model.md
+++ b/_docs/data-sources-and-file-formats/050-json-data-model.md
@@ -44,7 +44,7 @@ By default, Drill does not support JSON lists of different 
types. For example, J
 * `store.json.all_text_mode`  
   Reads all data from JSON files as VARCHAR. You need to cast numbers from 
VARCHAR to numerical data types, such as DOUBLE or INTEGER.
 
-The default setting of `store.json.all_text_mode` and 
`store.json.read_numbers_as_double` options is false. Using either option 
prevents schema errors, but using `store.json.read_numbers_as_double` has an 
advantage over `store.json.all_text_mode`. Using 
`store.json.read_numbers_as_double` typically involves less explicit casting 
than using `store.json.all_text_mode` because you can often use the numerical 
data as is -\-DOUBLE.
+The default setting of `store.json.all_text_mode` and 
`store.json.read_numbers_as_double` options is false. Using either option 
prevents schema errors, but using `store.json.read_numbers_as_double` has an 
advantage over `store.json.all_text_mode`. Using 
`store.json.read_numbers_as_double` typically involves less explicit casting 
than using `store.json.all_text_mode` because you can often use the numerical 
data as is-\-DOUBLE.
 
 ### Handling Type Differences
 Set the `store.json.read_numbers_as_double` property to true.

http://git-wip-us.apache.org/repos/asf/drill/blob/c18b098f/_docs/img/socialmed1.png
----------------------------------------------------------------------
diff --git a/_docs/img/socialmed1.png b/_docs/img/socialmed1.png
new file mode 100644
index 0000000..86c4776
Binary files /dev/null and b/_docs/img/socialmed1.png differ

http://git-wip-us.apache.org/repos/asf/drill/blob/c18b098f/_docs/img/socialmed10.png
----------------------------------------------------------------------
diff --git a/_docs/img/socialmed10.png b/_docs/img/socialmed10.png
new file mode 100644
index 0000000..978c951
Binary files /dev/null and b/_docs/img/socialmed10.png differ

http://git-wip-us.apache.org/repos/asf/drill/blob/c18b098f/_docs/img/socialmed11.png
----------------------------------------------------------------------
diff --git a/_docs/img/socialmed11.png b/_docs/img/socialmed11.png
new file mode 100644
index 0000000..140529d
Binary files /dev/null and b/_docs/img/socialmed11.png differ

http://git-wip-us.apache.org/repos/asf/drill/blob/c18b098f/_docs/img/socialmed12.png
----------------------------------------------------------------------
diff --git a/_docs/img/socialmed12.png b/_docs/img/socialmed12.png
new file mode 100644
index 0000000..f244838
Binary files /dev/null and b/_docs/img/socialmed12.png differ

http://git-wip-us.apache.org/repos/asf/drill/blob/c18b098f/_docs/img/socialmed13.png
----------------------------------------------------------------------
diff --git a/_docs/img/socialmed13.png b/_docs/img/socialmed13.png
new file mode 100644
index 0000000..db7fc6a
Binary files /dev/null and b/_docs/img/socialmed13.png differ

http://git-wip-us.apache.org/repos/asf/drill/blob/c18b098f/_docs/img/socialmed2.png
----------------------------------------------------------------------
diff --git a/_docs/img/socialmed2.png b/_docs/img/socialmed2.png
new file mode 100644
index 0000000..b5d78e3
Binary files /dev/null and b/_docs/img/socialmed2.png differ

http://git-wip-us.apache.org/repos/asf/drill/blob/c18b098f/_docs/img/socialmed3.png
----------------------------------------------------------------------
diff --git a/_docs/img/socialmed3.png b/_docs/img/socialmed3.png
new file mode 100644
index 0000000..72d651a
Binary files /dev/null and b/_docs/img/socialmed3.png differ

http://git-wip-us.apache.org/repos/asf/drill/blob/c18b098f/_docs/img/socialmed4.png
----------------------------------------------------------------------
diff --git a/_docs/img/socialmed4.png b/_docs/img/socialmed4.png
new file mode 100644
index 0000000..52f69bc
Binary files /dev/null and b/_docs/img/socialmed4.png differ

http://git-wip-us.apache.org/repos/asf/drill/blob/c18b098f/_docs/img/socialmed5.png
----------------------------------------------------------------------
diff --git a/_docs/img/socialmed5.png b/_docs/img/socialmed5.png
new file mode 100644
index 0000000..40f63d2
Binary files /dev/null and b/_docs/img/socialmed5.png differ

http://git-wip-us.apache.org/repos/asf/drill/blob/c18b098f/_docs/img/socialmed6.png
----------------------------------------------------------------------
diff --git a/_docs/img/socialmed6.png b/_docs/img/socialmed6.png
new file mode 100644
index 0000000..7e15565
Binary files /dev/null and b/_docs/img/socialmed6.png differ

http://git-wip-us.apache.org/repos/asf/drill/blob/c18b098f/_docs/img/socialmed7.png
----------------------------------------------------------------------
diff --git a/_docs/img/socialmed7.png b/_docs/img/socialmed7.png
new file mode 100644
index 0000000..38f2d89
Binary files /dev/null and b/_docs/img/socialmed7.png differ

http://git-wip-us.apache.org/repos/asf/drill/blob/c18b098f/_docs/img/socialmed8.png
----------------------------------------------------------------------
diff --git a/_docs/img/socialmed8.png b/_docs/img/socialmed8.png
new file mode 100644
index 0000000..7cc0600
Binary files /dev/null and b/_docs/img/socialmed8.png differ

http://git-wip-us.apache.org/repos/asf/drill/blob/c18b098f/_docs/img/socialmed9.png
----------------------------------------------------------------------
diff --git a/_docs/img/socialmed9.png b/_docs/img/socialmed9.png
new file mode 100644
index 0000000..ec55ea7
Binary files /dev/null and b/_docs/img/socialmed9.png differ

http://git-wip-us.apache.org/repos/asf/drill/blob/c18b098f/_docs/tutorials/030-analyzing-the-yelp-academic-dataset.md
----------------------------------------------------------------------
diff --git a/_docs/tutorials/030-analyzing-the-yelp-academic-dataset.md 
b/_docs/tutorials/030-analyzing-the-yelp-academic-dataset.md
index c822ada..82ab745 100644
--- a/_docs/tutorials/030-analyzing-the-yelp-academic-dataset.md
+++ b/_docs/tutorials/030-analyzing-the-yelp-academic-dataset.md
@@ -106,7 +106,7 @@ analysis extremely easy.
 
     0: jdbc:drill:zk=local> select stars,trunc(avg(review_count)) reviewsavg 
     from 
dfs.`/users/nrentachintala/Downloads/yelp/yelp_academic_dataset_business.json`
-    group by stars order by stars desc;``
+    group by stars order by stars desc;
 
     +------------+------------+
     |   stars    | reviewsavg |
@@ -263,7 +263,7 @@ on data.
 #### Top first categories in number of review counts
 
     0: jdbc:drill:zk=local> select categories[0], count(categories[0]) as 
categorycount 
-    from 
dfs.`/users/nrentachintala/Downloads/yelp_dataset_challenge_academic_dataset/yelp_academic_dataset_business.json`
 
+    from 
dfs.`/users/nrentachintala/Downloads/yelp_academic_dataset_business.json` 
     group by categories[0] 
     order by count(categories[0]) desc limit 10;
     +------------+---------------+
@@ -387,7 +387,7 @@ data so you can apply even deeper SQL functionality. Here 
is a sample query:
 #### Top categories used in business reviews
 
     0: jdbc:drill:zk=local> select celltbl.catl, count(celltbl.catl) 
categorycnt 
-    from (select flatten(categories) catl from 
dfs.`/users/nrentachintala/Downloads/yelp_dataset_challenge_academic_dataset/yelp_academic_dataset_business.json`
 ) celltbl 
+    from (select flatten(categories) catl from 
dfs.`/yelp_academic_dataset_business.json` ) celltbl 
     group by celltbl.catl 
     order by count(celltbl.catl) desc limit 10 ;
     +------------+-------------+

http://git-wip-us.apache.org/repos/asf/drill/blob/c18b098f/_docs/tutorials/060-analyzing-social-media.md
----------------------------------------------------------------------
diff --git a/_docs/tutorials/060-analyzing-social-media.md 
b/_docs/tutorials/060-analyzing-social-media.md
new file mode 100644
index 0000000..80dde9b
--- /dev/null
+++ b/_docs/tutorials/060-analyzing-social-media.md
@@ -0,0 +1,206 @@
+---
+title: "Analyzing Social Media"
+parent: "Tutorials"
+---
+
+This tutorial covers how to analyze Twitter data in native JSON format using 
Apache Drill. First, you configure an environment to stream the Twitter data 
filtered on keywords and languages using Apache Flume, and then you analyze the 
data using Drill. Finally, you run interactive reports and analysis using 
MicroStrategy.
+
+## Social Media Analysis Prerequisites
+
+* Twitter developer account
+* AWS account
+* A MapR node on AWS
+* A MicroStrategy AWS instance
+
+## Configuring the AWS environment
+
+Configuring the environment on Amazon Web Services (AWS) consists of these 
tasks:
+
+* Create a Twitter Dev account and register a Twitter application  
+* Provision a preconfigured AWS MapR node with Flume and Drill  
+* Provision a MicroStrategy AWS instance  
+* Configure MicroStrategy to run reports and analyses using Drill  
+* Create a Twitter Dev account and register an application
+
+This tutorial assumes you are familiar with MicroStrategy. For information 
about using MicroStrategy, see the [MicroStrategy 
documentation](http://www.microstrategy.com/Strategy/media/downloads/products/cloud/cloud_aws-user-guide.pdf).
+
+----------
+
+## Establishing a Twitter Feed and Flume Credentials
+
+The following steps establish a Twitter feed and get Twitter credentials 
required by Flume to set up Twitter as a data source:
+
+1. Go to dev.twitter.com and sign in with your Twitter account details.  
+2. Click **Manage Your Apps** under Tools in the page footer.  
+3. Click **Create New App** and fill in the form, then create the application.
+4. On the **Keys and Access Tokens** tab, create an access token, and then 
click **Create My Access Token**. If you have read-only access, you can create 
the token.
+5. Copy the following credentials for the Twitter App that will be used to 
configure Flume: 
+   * Consumer Key
+   * Consumer Secret
+   * Access Token
+   * Access Token Secret
+
+----------
+
+## Provision Preconfigured MapR Node on AWS
+
+You need to provision a preconfigured MapR node on AWS named ami-4dedc47d. The 
AMI is already configured with Flume, Drill, and specific elements to support 
data streaming from Twitter and Drill query views. The AMI is publicly 
available under Community AMIs, has a 6GB root drive, and a 100GB data drive. 
Being a small node, very large volumes of data will significantly decrease the 
response time to Twitter data queries.
+
+1. In AWS, launch an instance.  
+   The AMI image is preconfigured to use a m2.2xlarge instance type with 4 
vCPUs and 32GB of memory.  
+2. Select the AMI id ami-4dedc47d.  
+3. Make sure that the instance has been assigned an external IP address; an 
Elastic IP is preferred, but not essential.  
+4. Verify that a security group is used with open TCP and UDP ports on the 
node. At this time, all ports are left open on the node.
+5. After provisioning and booting up the instance, reboot the node in the AWS 
EC2 management interface to finalize the configuration.
+
+The node is now configured with the required Flume and Drill installation. 
Next, update the Flume configuration files with the required credentials and 
keywords.
+
+----------
+
+## Update Flume Configuration Files
+
+1. Log in as the ec2-user using the AWS credentials.
+2. Switch to the mapr user on the node using `su â mapr.`
+3. Update the Flume configuration files `flume-env.sh` and `flume`.conf in the 
`<FLUME HOME>/conf` directory using the Twitter app credentials from the first 
section. See the [sample 
files](https://github.com/mapr/mapr-demos/tree/master/drill-twitter-MSTR/flume).
+4. Enter the desired keywords, separated by a comma.  
+   Separate multiple keywords using a space.  
+5. Filter tweets for specific languages, if needed, by entering the ISO 639-1 
[language codes](http://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) 
separated by a comma. If you need no language filtering, leave the parameter 
blank.  
+6. Go to the FLUME HOME directory and, as user `mapr`, type screen on the 
command line as user `mapr`:  
+7. Start Flume by typing the following command:  
+
+        ./bin/flume-ng agent --conf ./conf/ -f ./conf/flume.conf 
-Dflume.root.logger=INFO,console -n TwitterAgent
+8. Enter `CTRL+a` to exit, followed by `d` to detach.  
+   To go back to the screen terminal, simply enter screen âr to reattach.  
+   Twitter data streams into the system.  
+9. Run the following command to verify volumes:
+
+         du âh /mapr/drill_demo/twitter/feed.
+
+You cannot run queries until data appears in the feed directory. Allow 20-30 
minutes minimum. 
+
+----------
+
+## Provision a MicroStrategy AWS Instance
+
+MicroStrategy provides an AWS instance of various sizes. It comes with a free 
30-day trial for the MicroStrategy instance. AWS charges still apply for the 
platform and OS.
+
+To provision the MicroStrategy node in AWS:
+
+1. On the [MicroStrategy 
website](http://www.microstrategy.com/us/analytics/analytics-on-aws), click 
*Get started*.  
+2. Select some number of users, for example, select 25 users.  
+3. Select the AWS region. Using a MapR node and MicroStrategy instance in the 
same AWS region is highly recommended.
+4. Click **Continue**.  
+5. On the Manual Launch tab, click **Launch with EC2 Console** next to the 
appropriate region, and select **r3.large instance**.  
+   An EC2 instance of r3.large is sufficient for the 25 user version.  
+6. Click **Configure Instance Details**.
+7. Select an appropriate network setting and zones, ideally within the same 
zone and network as the MapR node that you provisioned.
+   {% include startimportant.html %}Make sure that the MicroStrategy instance 
has a Public IP; elastic IP is preferred but not essential.{% include 
endimportant.html %}
+8. Keep the default storage.
+9. Assign a tag to identify the instance.
+10. Select a security group that allows sufficient access to external IPs and 
open all ports because security is not a concern. 
+11. In the AWS Console, launch an instance, and when the AWS reports that the 
instance is running, select it, and click **Connect**.
+12. Click **Get Password** to get the OS Administrator password.
+
+The instance is now accessible with RDP and is using the relevant AWS 
credentials and security.
+
+----------
+
+## Configure MicroStrategy
+
+You need to configure MicroStrategy to integrate with Drill using the ODBC 
driver. You install a MicroStrategy package with a number of useful, prebuilt 
reports for working with Twitter data. You can modify the reports or use the 
reports as a template to create new and more interesting reports and analysis 
models.
+
+1. Configure a System DSN named `Twitter` with the ODBC administrator. The 
quick start version of the MapR ODBC driver requires the DSN.  
+2. [Download the quick start version of the MapR ODBC driver for 
Drill](http://package.mapr.com/tools/MapR-ODBC/MapR_Drill/MapRDrill_odbc_v0.08.1.0618/MapRDrillODBC32.msi).
  
+3. [Configure the ODBC 
driver](http://drill.apache.org/docs/using-microstrategy-analytics-with-apache-drill)
 for Drill on MicroStrategy Analytics.  
+    The Drill object is part of the package and doesnât need to be 
configured.  
+4. Use the AWS Private IP if both the MapR node and the MicroStrategy instance 
are located in the same region (recommended).
+5. Download the [Drill and Twitter 
configuration](https://github.com/mapr/mapr-demos/blob/master/drill-twitter-MSTR/MSTR/DrillTwitterProjectPackage.mmp)
 package for MicroStrategy on the Windows system using Git for Windows or the 
full GitHub for Windows.
+
+----------
+
+## Import Reports
+
+1. In MicroStrategy Developer, select **Schema > Create New Project** to 
create a new project with MicroStrategy Developer.  
+2. Click **Create Project** and type a name for the new project.  
+3. Click **OK**.  
+   The Project appears in MicroStrategy Developer.  
+4. Open MicroStrategy Object Manager.  
+5. Connect to the Project Source and login as Administrator.  
+   ![project sources]({{ site.baseurl }}/docs/img/socialmed1.png)
+6. In MicroStrategy Object Manager, MicroStrategy Analytics Modules, select 
the project for the package. For example, select **Twitter analysis Apache 
Drill**.  
+   ![project sources]({{ site.baseurl }}/docs/img/socialmed2.png)
+7. Select **Tools > Import Configuration Package**.  
+8. Open the configuration package file, and click **Proceed**.  
+   ![project sources]({{ site.baseurl }}/docs/img/socialmed3.png)
+   The package with the reports is available in MicroStrategy.  
+
+You can test and modify the reports in MicroStrategy Developer. Configure 
permissions if necessary.
+
+----------
+
+## Update the Schema
+
+1. In MicroStrategy Developer, select **Schema > Update Schema**.  
+2. In Schema Update, select all check boxes, and click **Update**.  
+   ![project sources]({{ site.baseurl }}/docs/img/socialmed4.png)
+
+----------
+
+## Create a User and Set the Password
+
+1. Expand Administration.  
+2. Expand User Manager, and click **Everyone**.  
+3. Right-click to create a new user, or click **Administrator** to edit the 
password.  
+
+----------
+
+## About the Reports
+
+There are 18 reports in the package. Most reports prompt you to specify date 
ranges, output limits, and terms as needed. The package contains reports in 
three main categories:
+
+* Volumes: A number of reports that show the total volume of Tweets by 
different date and time designations.
+* Top List: Displays the top Tweets, Retweets, hashtags and users are 
displayed.
+* Specific Terms: Tweets and Retweets that can be measured or listed based on 
terms in the text of the Tweet itself.
+
+You can copy and modify the reports or use the reports as a template for 
querying Twitter data using Drill. 
+
+You can access reports through MicroStrategy Developer or the web interface. 
MicroStrategy Developer provides a more powerful interface than the web 
interface to modify reports or add new reports, but requires RDP access to the 
node.
+
+----------
+
+## Using the Web Interface
+
+1. Using a web browser, enter the URL for the web interface:  
+         http://<MSTR node name or IP address>/MicroStrategy/asp/Main.aspx
+2. Log in as the User you created or as Administrator, using the credentials 
created initially with Developer.  
+3. On the Welcome MicroStrategy Web User page, choose the project that was 
used to load the analysis package: **Drill Twitter Analysis**.  
+   ![choose project]({{ site.baseurl }}/docs/img/socialmed5.png)
+4. Select **Shared Reports**.  
+   The folders with the three main categories of the reports appear.
+   ![project sources]({{ site.baseurl }}/docs/img/socialmed6.png)
+5. Select a report, and respond to any prompts. For example, to run the Top 
Tweet Languages by Date Range, enter the required Date_Start and Date_End.  
+   ![project sources]({{ site.baseurl }}/docs/img/socialmed7.png)
+6. Click **Run Report**.  
+   A histogram report appears showing the top tweet languages by date range.
+   ![project sources]({{ site.baseurl }}/docs/img/socialmed8.png)
+7. To refresh the data or re-enter prompt values, select **Data > Refresh** or 
**Data > Re-prompt**.
+
+## Browsing the Apache Drill Twitter Analysis Reports
+
+The MicroStrategy Developer reports are located in the Public Objects folder 
of the project you chose for installing the package.  
+   ![project sources]({{ site.baseurl }}/docs/img/socialmed9.png)
+Many of the reports require you to respond to prompts to select the desired 
data. For example, select the Top Hashtags report in the right-hand column. 
This report requires you to respond to prompts for a Start Date and End Date to 
specify the date range for data of interest; by default, data for the last two 
months, ending with the current date is selected. You can also specify the 
limit for the number of Top Hashtags to be returned; the default is the top 10 
hashtags.  
+   ![project sources]({{ site.baseurl }}/docs/img/socialmed10.png)
+When you click **Finish** a bar chart report with the hashtag and number of 
times it appeared in the specified data range appears.  
+   ![project sources]({{ site.baseurl }}/docs/img/socialmed11.png)
+
+Other reports are available in the bundle. For example, this report shows 
total tweets by hour:
+   ![tweets by hour]({{ site.baseurl }}/docs/img/socialmed12.png)
+This report shows top Retweets for a date range with original Tweet date and 
count in the date range.  
+   ![retweets report]({{ site.baseurl }}/docs/img/socialmed13.png)
+
+----------
+
+## Summary
+
+In this tutorial, you learned how to configure an environment to stream 
Twitter data using Apache Flume. You then learned how to analyze the data in 
native JSON format with SQL using Apache Drill, and how to run interactive 
reports and analysis using MicroStrategy.
\ No newline at end of file

[10/26] drill git commit: social media tutorial

Reply via email to