This is an automated email from the ASF dual-hosted git repository.
dzamo pushed a commit to branch gh-pages
in repository https://gitbox.apache.org/repos/asf/drill.git
The following commit(s) were added to refs/heads/gh-pages by this push:
new 89c596f Document Splunk storage plugin.
89c596f is described below
commit 89c596f72b2d0742ca052021aa0365bb9cf7b1c3
Author: James Turton <[email protected]>
AuthorDate: Fri Jun 18 15:39:54 2021 +0200
Document Splunk storage plugin.
---
_data/docs.json | 71 ++++++--
.../plugins/140-splunk-storage-plugin.md | 195 +++++++++++++++++++++
2 files changed, 256 insertions(+), 10 deletions(-)
diff --git a/_data/docs.json b/_data/docs.json
index e5deb22..e70c1ea 100644
--- a/_data/docs.json
+++ b/_data/docs.json
@@ -3084,14 +3084,31 @@
}
],
"children": [],
- "next_title": "ODBC/JDBC Interfaces",
- "next_url": "/docs/odbc-jdbc-interfaces/",
+ "next_title": "Splunk Plugin",
+ "next_url": "/docs/splunk-plugin/",
"parent": "Connect a Data Source",
"previous_title": "HTTP Storage Plugin",
"previous_url": "/docs/http-storage-plugin/",
"relative_path":
"_docs/connect-a-data-source/plugins/130-elasticsearch-plugin.md",
"title": "ElasticSearch Plugin",
"url": "/docs/elasticsearch-plugin/"
+ },
+ {
+ "breadcrumbs": [
+ {
+ "title": "Connect a Data Source",
+ "url": "/docs/connect-a-data-source/"
+ }
+ ],
+ "children": [],
+ "next_title": "ODBC/JDBC Interfaces",
+ "next_url": "/docs/odbc-jdbc-interfaces/",
+ "parent": "Connect a Data Source",
+ "previous_title": "ElasticSearch Plugin",
+ "previous_url": "/docs/elasticsearch-plugin/",
+ "relative_path":
"_docs/connect-a-data-source/plugins/140-splunk-storage-plugin.md",
+ "title": "Splunk Plugin",
+ "url": "/docs/splunk-plugin/"
}
],
"next_title": "Connect a Data Source Introduction",
@@ -5311,8 +5328,8 @@
}
],
"children": [],
- "next_title": "ODBC/JDBC Interfaces",
- "next_url": "/docs/odbc-jdbc-interfaces/",
+ "next_title": "Splunk Plugin",
+ "next_url": "/docs/splunk-plugin/",
"parent": "Connect a Data Source",
"previous_title": "HTTP Storage Plugin",
"previous_url": "/docs/http-storage-plugin/",
@@ -8300,8 +8317,8 @@
"next_title": "Interfaces Introduction",
"next_url": "/docs/interfaces-introduction/",
"parent": "",
- "previous_title": "ElasticSearch Plugin",
- "previous_url": "/docs/elasticsearch-plugin/",
+ "previous_title": "Splunk Plugin",
+ "previous_url": "/docs/splunk-plugin/",
"relative_path": "_docs/060-odbc-jdbc-interfaces.md",
"title": "ODBC/JDBC Interfaces",
"url": "/docs/odbc-jdbc-interfaces/"
@@ -15430,6 +15447,23 @@
"title": "Sort-Based and Hash-Based Memory-Constrained Operators",
"url":
"/docs/sort-based-and-hash-based-memory-constrained-operators/"
},
+ "Splunk Plugin": {
+ "breadcrumbs": [
+ {
+ "title": "Connect a Data Source",
+ "url": "/docs/connect-a-data-source/"
+ }
+ ],
+ "children": [],
+ "next_title": "ODBC/JDBC Interfaces",
+ "next_url": "/docs/odbc-jdbc-interfaces/",
+ "parent": "Connect a Data Source",
+ "previous_title": "ElasticSearch Plugin",
+ "previous_url": "/docs/elasticsearch-plugin/",
+ "relative_path":
"_docs/connect-a-data-source/plugins/140-splunk-storage-plugin.md",
+ "title": "Splunk Plugin",
+ "url": "/docs/splunk-plugin/"
+ },
"Start-Up Options": {
"breadcrumbs": [
{
@@ -19143,14 +19177,31 @@
}
],
"children": [],
- "next_title": "ODBC/JDBC Interfaces",
- "next_url": "/docs/odbc-jdbc-interfaces/",
+ "next_title": "Splunk Plugin",
+ "next_url": "/docs/splunk-plugin/",
"parent": "Connect a Data Source",
"previous_title": "HTTP Storage Plugin",
"previous_url": "/docs/http-storage-plugin/",
"relative_path":
"_docs/connect-a-data-source/plugins/130-elasticsearch-plugin.md",
"title": "ElasticSearch Plugin",
"url": "/docs/elasticsearch-plugin/"
+ },
+ {
+ "breadcrumbs": [
+ {
+ "title": "Connect a Data Source",
+ "url": "/docs/connect-a-data-source/"
+ }
+ ],
+ "children": [],
+ "next_title": "ODBC/JDBC Interfaces",
+ "next_url": "/docs/odbc-jdbc-interfaces/",
+ "parent": "Connect a Data Source",
+ "previous_title": "ElasticSearch Plugin",
+ "previous_url": "/docs/elasticsearch-plugin/",
+ "relative_path":
"_docs/connect-a-data-source/plugins/140-splunk-storage-plugin.md",
+ "title": "Splunk Plugin",
+ "url": "/docs/splunk-plugin/"
}
],
"next_title": "Connect a Data Source Introduction",
@@ -19775,8 +19826,8 @@
"next_title": "Interfaces Introduction",
"next_url": "/docs/interfaces-introduction/",
"parent": "",
- "previous_title": "ElasticSearch Plugin",
- "previous_url": "/docs/elasticsearch-plugin/",
+ "previous_title": "Splunk Plugin",
+ "previous_url": "/docs/splunk-plugin/",
"relative_path": "_docs/060-odbc-jdbc-interfaces.md",
"title": "ODBC/JDBC Interfaces",
"url": "/docs/odbc-jdbc-interfaces/"
diff --git a/_docs/connect-a-data-source/plugins/140-splunk-storage-plugin.md
b/_docs/connect-a-data-source/plugins/140-splunk-storage-plugin.md
new file mode 100644
index 0000000..ed2cd20
--- /dev/null
+++ b/_docs/connect-a-data-source/plugins/140-splunk-storage-plugin.md
@@ -0,0 +1,195 @@
+---
+title: "Splunk Plugin"
+slug: "Splunk Plugin"
+parent: "Connect a Data Source"
+---
+
+**Introduced in release:** 1.19
+
+Drill's Splunk storage plugin allows you to execute SQL queries against Splunk
+indexes. This storage plugin implementation is based on
+[Apache Calcite adapter for
Splunk](https://calcite.apache.org/javadocAggregate/org/apache/calcite/adapter/splunk/package-summary.html).
+
+## Configuration
+
+To connect Drill to Splunk, create a new storage plugin with a configuration
+containing the following properties.
+
+```json
+{
+ "type":"splunk",
+ "username": "admin",
+ "password": "changeme",
+ "hostname": "localhost",
+ "port": 8089,
+ "earliestTime": "-14d",
+ "latestTime": "now",
+ "enabled": false
+}
+```
+
+### Configuration Options
+
+| Option | Default | Description |
+| ------------ | ------- | ------------------------------------------------- |
+| type | (none) | Set to "splunk" to use this plugin |
+| username | null | Splunk username to be used by Drill |
+| password | null | Splunk password to be used by Drill |
+| hostname | null | Splunk host to be queried by Drill |
+| port | null | TCP port over which Drill will connect to Splunk. |
+| earliestTime | null | Global earliest record timestamp default |
+| latestTime | null | Global latest record timestamp default |
+
+## Understanding Splunk's Data Model
+
+Splunk's primary use case is analyzing event logs with a timestamp. As such,
+data is indexed by the timestamp, with the most recent data being indexed
first.
+By default, Splunk will sort the data in reverse chronological order. Large
+Splunk installations will put older data into buckets of hot, warm and cold
+storage with the "cold" storage on the slowest and cheapest disks.
+
+With this understood, it is **very** important to put time boundaries on your
+Splunk queries. The Drill plugin allows you to set default values in the
+configuration such that every query you run will be bounded by these
boundaries.
+Alternatively, you can set the time boundaries at query time. In either case,
+you will achieve the best performance when you ask Splunk for the smallest
+amount of data possible.
+
+## Understanding Drill's Data Model for Splunk
+
+Drill treats Splunk indexes as tables. Splunk's access model does not restrict
+to the catalog, but does restrict access to the actual data. It is therefore
+possible that you can see the names of indexes to which you do not have access.
+You can view the list of available indexes with a `SHOW TABLES IN splunk`
query.
+
+```
+apache drill> SHOW TABLES IN splunk;
++--------------+----------------+
+| TABLE_SCHEMA | TABLE_NAME |
++--------------+----------------+
+| splunk | summary |
+| splunk | splunklogger |
+| splunk | _thefishbucket |
+| splunk | _audit |
+| splunk | _internal |
+| splunk | _introspection |
+| splunk | main |
+| splunk | history |
+| splunk | _telemetry |
++--------------+----------------+
+9 rows selected (0.304 seconds)
+```
+
+To query Splunk from Drill, use the following format:
+
+```sql
+SELECT <fields>
+FROM splunk.<index>
+```
+
+## Bounding Your Queries
+
+When you learn to query Splunk via their interface, the first thing you learn
is
+to bound your queries so that they are looking at the shortest time span
+possible. When using Drill to query Splunk, it is advisable to do the same
+thing, and Drill offers two ways to accomplish this: via the configuration and
+at query time.
+
+### Bounding your Queries at Query Time
+
+The easiest way to bound your query is to do so at querytime via special
filters
+in the `WHERE` clause. There are two special fields, `earliestTime` and
+`latestTime` which can be set to bound the query. If they are not set, the
query
+will be bounded to the defaults set in the configuration. You can use any of
the
+time formats specified in
+[the Splunk
documentation](https://docs.splunk.com/Documentation/Splunk/8.0.3/SearchReference/SearchTimeModifiers)
+
+So if you wanted to see your data for the last 15 minutes, you could execute
the
+following query:
+
+```sql
+SELECT <fields>
+FROM splunk.<index>
+WHERE earliestTime='-15m' AND latestTime='now'
+```
+
+The variables set in a query override the defaults from the configuration.
+
+## Data Types
+
+Splunk does not have sophisticated data types and unfortunately does not
provide
+metadata from its query results. With the exception of the fields below, Drill
+will interpret all fields as `VARCHAR` and hence you will have to convert them
+to the appropriate data type at query time.
+
+#### Timestamp Fields
+
+- `_indextime`
+- `_time`
+
+#### Numeric Fields
+
+- `date_hour`
+- `date_mday`
+- `date_minute`
+- `date_second`
+- `date_year`
+- `linecount`
+
+### Nested Data
+
+Splunk has two different types of nested data which roughly map to Drill's
+`LIST` and `MAP` data types. Unfortunately, there is no easy way to identify
+whether a field is a nested field at querytime as Splunk does not provide any
+metadata and therefore all fields are treated as `VARCHAR`.
+
+However, Drill does have built in functions to easily convert Splunk
multifields
+into Drill `LIST` and `MAP` data types. For a LIST, simply use the
+`SPLIT(<field>, ' ')` function to split the field into a `LIST`.
+
+`MAP` data types are rendered as JSON in Splunk. Fortunately JSON can easily be
+parsed into a Drill Map by using the `convert_fromJSON()` function. The query
+below demonstrates how to convert a JSON column into a Drill `MAP`.
+
+```sql
+SELECT convert_fromJSON(_raw)
+FROM splunk.spl
+WHERE spl = '| makeresults
+| eval _raw="{\"pc\":{\"label\":\"PC\",\"count\":24,\"peak24\":12},\"ps3\":
+{\"label\":\"PS3\",\"count\":51,\"peak24\":10},\"xbox\":
+{\"label\":\"XBOX360\",\"count\":40,\"peak24\":11},\"xone\":
+{\"label\":\"XBOXONE\",\"count\":105,\"peak24\":99},\"ps4\":
+{\"label\":\"PS4\",\"count\":200,\"peak24\":80}}"'
+```
+
+### Selecting Fields
+
+When you execute a query in Drill for Splunk, the fields you select are pushed
+down to Splunk. Therefore, it will always be more efficient to explicitly
+specify fields to push down to Splunk rather than using `SELECT *` queries.
+
+### Special Fields
+
+There are several fields which can be included in a Drill query
+
+- `spl`: If you just want to send an SPL query to Splunk, this will do that.
+- `earliestTime`: Overrides the `earliestTime` setting in the configuration.
+- `latestTime`: Overrides the `latestTime` setting in the configuration.
+
+### Sorting Results
+
+Due to the nature of Splunk indexes, data will always be returned in reverse
+chronological order. Thus, sorting is not necessary if that is the desired
+order.
+
+## Sending Arbitrary SPL to Splunk
+
+There is a special table called `spl` which you can use to send arbitrary
+queries to Splunk. If you use this table, you must include a query in the `spl`
+filter as shown below:
+
+```sql
+SELECT *
+FROM splunk.spl
+WHERE spl='<your SPL query>'
+```