incubator-hawq git commit: HAWQ-975. Added comments.

2016-08-19 Thread odiachenko
Repository: incubator-hawq
Updated Branches:
  refs/heads/HAWQ-967 0ce866737 -> 9c517d962


HAWQ-975. Added comments.


Project: http://git-wip-us.apache.org/repos/asf/incubator-hawq/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-hawq/commit/9c517d96
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hawq/tree/9c517d96
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hawq/diff/9c517d96

Branch: refs/heads/HAWQ-967
Commit: 9c517d9629a9a3e149b62aa052fba70cc244891c
Parents: 0ce8667
Author: Oleksandr Diachenko 
Authored: Fri Aug 19 17:30:11 2016 -0700
Committer: Oleksandr Diachenko 
Committed: Fri Aug 19 17:30:11 2016 -0700

--
 src/backend/access/external/pxffilters.c | 17 ++---
 1 file changed, 14 insertions(+), 3 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/9c517d96/src/backend/access/external/pxffilters.c
--
diff --git a/src/backend/access/external/pxffilters.c 
b/src/backend/access/external/pxffilters.c
index bd7588b..405e6d7 100644
--- a/src/backend/access/external/pxffilters.c
+++ b/src/backend/access/external/pxffilters.c
@@ -611,7 +611,13 @@ char *serializePxfFilterQuals(List *quals)
 }
 
 
-
+/*
+ * Returns a list of attributes, extracted from quals.
+ * Supports AND, OR, NOT operations.
+ * Supports =, <, <=, >, >=, IS NULL, IS NOT NULL, BETWEEN, IN operators.
+ * List might contain duplicates.
+ * Caller should release memory once result is not needed.
+ */
 List* extractPxfAttributes(List* quals)
 {
 
@@ -650,8 +656,13 @@ List* extractPxfAttributes(List* quals)
break;
}
default:
-   /* expression not supported */
-   elog(ERROR, "extractPxfAttributes: unsupported 
node tag %d, unable to extract column from WHERE clause", tag);
+   /*
+* tag is not supported, it's risk of having:
+* 1) false-positive tuples
+* 2) unable to join tables
+* 3) etc
+*/
+   elog(ERROR, "extractPxfAttributes: unsupported 
node tag %d, unable to extract attribute from qualifier", tag);
break;
}
}



[incubator-hawq] Git Push Summary

2016-08-19 Thread odiachenko
Repository: incubator-hawq
Updated Branches:
  refs/heads/HAWQ-953 [deleted] 4275ae363


incubator-hawq git commit: HAWQ-975. Added memory cleanup.

2016-08-19 Thread odiachenko
Repository: incubator-hawq
Updated Branches:
  refs/heads/HAWQ-967 f40b2918e -> 0ce866737


HAWQ-975. Added memory cleanup.


Project: http://git-wip-us.apache.org/repos/asf/incubator-hawq/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-hawq/commit/0ce86673
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hawq/tree/0ce86673
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hawq/diff/0ce86673

Branch: refs/heads/HAWQ-967
Commit: 0ce86673718d99918ed0782aa1c4d3270d3d080c
Parents: f40b291
Author: Oleksandr Diachenko 
Authored: Fri Aug 19 15:10:57 2016 -0700
Committer: Oleksandr Diachenko 
Committed: Fri Aug 19 15:10:57 2016 -0700

--
 src/backend/access/external/pxfheaders.c | 13 +++--
 1 file changed, 7 insertions(+), 6 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/0ce86673/src/backend/access/external/pxfheaders.c
--
diff --git a/src/backend/access/external/pxfheaders.c 
b/src/backend/access/external/pxfheaders.c
index c7b2ab4..49dd966 100644
--- a/src/backend/access/external/pxfheaders.c
+++ b/src/backend/access/external/pxfheaders.c
@@ -35,7 +35,7 @@ static void add_location_options_httpheader(CHURL_HEADERS 
headers, GPHDUri *gphd
 static char* prepend_x_gp(const char* key);
 static void add_delegation_token_headers(CHURL_HEADERS headers, PxfInputData 
*inputData);
 static void add_remote_credentials(CHURL_HEADERS headers);
-static void add_projection_desc_httpheader(CHURL_HEADERS headers, 
ProjectionInfo *projInfo, List *whereAttributes);
+static void add_projection_desc_httpheader(CHURL_HEADERS headers, 
ProjectionInfo *projInfo, List *qualsAttributes);
 
 /* 
  * Add key/value pairs to connection header. 
@@ -65,9 +65,9 @@ void build_http_header(PxfInputData *input)

if (proj_info != NULL && proj_info->pi_isVarList)
{
-   List* whereAttributes = extractPxfAttributes(input->quals);
+   List* qualsAttributes = extractPxfAttributes(input->quals);
 
-   add_projection_desc_httpheader(headers, proj_info, 
whereAttributes);
+   add_projection_desc_httpheader(headers, proj_info, 
qualsAttributes);
}
 
/* GP cluster configuration */
@@ -169,7 +169,7 @@ static void add_tuple_desc_httpheader(CHURL_HEADERS 
headers, Relation rel)
pfree(formatter.data);
 }
 
-static void add_projection_desc_httpheader(CHURL_HEADERS headers, 
ProjectionInfo *projInfo, List *whereAttributes) {
+static void add_projection_desc_httpheader(CHURL_HEADERS headers, 
ProjectionInfo *projInfo, List *qualsAttributes) {
 int i;
 char long_number[sizeof(int32) * 8];
 int *varNumbers = projInfo->pi_varNumbers;
@@ -177,7 +177,7 @@ static void add_projection_desc_httpheader(CHURL_HEADERS 
headers, ProjectionInfo
 initStringInfo(&formatter);
 
 /* Convert the number of projection columns to a string */
-pg_ltoa(list_length(projInfo->pi_targetlist) + 
list_length(whereAttributes), long_number);
+pg_ltoa(list_length(projInfo->pi_targetlist) + 
list_length(qualsAttributes), long_number);
 churl_headers_append(headers, "X-GP-ATTRS-PROJ", long_number);
 
 for(i = 0; i < list_length(projInfo->pi_targetlist); i++) {
@@ -191,7 +191,7 @@ static void add_projection_desc_httpheader(CHURL_HEADERS 
headers, ProjectionInfo
 
ListCell *attribute = NULL;
 
-   foreach(attribute, whereAttributes)
+   foreach(attribute, qualsAttributes)
{
AttrNumber attrNumber = lfirst_int(attribute);
 
@@ -203,6 +203,7 @@ static void add_projection_desc_httpheader(CHURL_HEADERS 
headers, ProjectionInfo
}
 
 
+list_free(qualsAttributes);
 pfree(formatter.data);
 }
 



incubator-hawq git commit: HAWQ-975. Added support for IN, BETWEEN, LIKE.

2016-08-19 Thread odiachenko
Repository: incubator-hawq
Updated Branches:
  refs/heads/HAWQ-967 d0e541938 -> f40b2918e


HAWQ-975. Added support for IN, BETWEEN, LIKE.


Project: http://git-wip-us.apache.org/repos/asf/incubator-hawq/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-hawq/commit/f40b2918
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hawq/tree/f40b2918
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hawq/diff/f40b2918

Branch: refs/heads/HAWQ-967
Commit: f40b2918eca1fb1293781354cef8c1d216b07b30
Parents: d0e5419
Author: Oleksandr Diachenko 
Authored: Fri Aug 19 14:40:22 2016 -0700
Committer: Oleksandr Diachenko 
Committed: Fri Aug 19 14:40:22 2016 -0700

--
 src/backend/access/external/pxffilters.c | 44 ---
 1 file changed, 27 insertions(+), 17 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/f40b2918/src/backend/access/external/pxffilters.c
--
diff --git a/src/backend/access/external/pxffilters.c 
b/src/backend/access/external/pxffilters.c
index 4ec8dde..bd7588b 100644
--- a/src/backend/access/external/pxffilters.c
+++ b/src/backend/access/external/pxffilters.c
@@ -38,6 +38,7 @@ static char* pxf_serialize_filter_list(List *filters);
 static bool opexpr_to_pxffilter(OpExpr *expr, PxfFilterDesc *filter);
 static bool supported_filter_type(Oid type);
 static void const_to_str(Const *constval, StringInfo buf);
+static List* append_attr_from_var(Var* var, List* attrs);
 
 /*
  * All supported HAWQ operators, and their respective HFDS operator code.
@@ -196,7 +197,7 @@ pxf_make_filter_list(List *quals)
elog(DEBUG5, "pxf_make_filter_list: node tag %d 
(T_BoolExpr), bool node type %d %s",
tag, boolType, 
boolType==AND_EXPR ? "(AND_EXPR)" : "");
 
-   /* only AND_EXPR is supported for filter 
push-down*/
+   /* only AND_EXPR is supported */
if (expr->boolop == AND_EXPR)
{
List *inner_result = 
pxf_make_filter_list(expr->args);
@@ -451,7 +452,8 @@ opexpr_to_pxffilter(OpExpr *expr, PxfFilterDesc *filter)
return false;
 }
 
-List* append_attr_from_var(Var* var, List* attrs)
+static List*
+append_attr_from_var(Var* var, List* attrs)
 {
AttrNumber varattno = var->varattno;
/* system attr not supported */
@@ -462,7 +464,7 @@ List* append_attr_from_var(Var* var, List* attrs)
 }
 
 static List*
-get_attrs_from_opexpr(OpExpr *expr)
+get_attrs_from_expr(Expr *expr)
 {
Node*leftop = NULL;
Node*rightop= NULL;
@@ -471,10 +473,17 @@ get_attrs_from_opexpr(OpExpr *expr)
if ((!expr))
return attrs;
 
-   leftop = get_leftop((Expr*)expr);
-   rightop = get_rightop((Expr*)expr);
+   if (IsA(expr, OpExpr))
+   {
+   leftop = get_leftop(expr);
+   rightop = get_rightop(expr);
+   } else if (IsA(expr, ScalarArrayOpExpr))
+   {
+   ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) expr;
+   leftop = (Node *) linitial(saop->args);
+   rightop = (Node *) lsecond(saop->args);
+   }
 
-   /* arguments must be VAR and CONST */
if (IsA(leftop, Var))
{
attrs = append_attr_from_var((Var *) leftop, attrs);
@@ -620,28 +629,29 @@ List* extractPxfAttributes(List* quals)
switch (tag)
{
case T_OpExpr:
+   case T_ScalarArrayOpExpr:
{
-   OpExpr  *expr   = (OpExpr *) 
node;
-   List*attrs = 
get_attrs_from_opexpr(expr);
-   attributes = lappend(attributes, attrs);
-   break;
-   }
-   case T_NullTest:
-   {
-   NullTest*expr = (NullTest *) node;
-   attributes = append_attr_from_var((Var *) 
expr->arg, attributes);
+   Expr* expr = (Expr *) node;
+   List*attrs = 
get_attrs_from_expr(expr);
+   attributes = list_concat(attributes, attrs);
break;
}
case T_BoolExpr:
{
-   BoolExpr*expr = (BoolExpr *) node;
+   BoolExpr* expr = (BoolExpr *) node;
List *inner_result = 
extractPxfAttributes(

[4/5] incubator-hawq-docs git commit: Removes heap table statement, updates [#128180963]

2016-08-19 Thread yozie
Removes heap table statement, updates [#128180963]


Project: http://git-wip-us.apache.org/repos/asf/incubator-hawq-docs/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-hawq-docs/commit/42fa1bc9
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hawq-docs/tree/42fa1bc9
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hawq-docs/diff/42fa1bc9

Branch: refs/heads/develop
Commit: 42fa1bc9363fc6104fe575e033129a1d5701c185
Parents: 2349cea
Author: Jane Beckman 
Authored: Thu Aug 18 11:39:47 2016 -0700
Committer: David Yozie 
Committed: Fri Aug 19 10:47:57 2016 -0700

--
 reference/sql/CREATE-TABLE.html.md.erb | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/incubator-hawq-docs/blob/42fa1bc9/reference/sql/CREATE-TABLE.html.md.erb
--
diff --git a/reference/sql/CREATE-TABLE.html.md.erb 
b/reference/sql/CREATE-TABLE.html.md.erb
index 5d1098b..99ff35e 100644
--- a/reference/sql/CREATE-TABLE.html.md.erb
+++ b/reference/sql/CREATE-TABLE.html.md.erb
@@ -228,7 +228,7 @@ The following storage options are available:
 
 **bucketnum** — Set to the number of hash buckets to be used in creating a 
hash-distributed table, specified as an integer greater than 0 and no more than 
the value of `default_hash_table_bucket_number`. The default when the table is 
created is 6 times the segment count. However, explicitly setting the bucket 
number when creating a hash table is recommended.
 
-**ORIENTATION** — Set to `row` (the default) for row-oriented storage, or 
parquet. The parquet column-oriented format can be more efficient for 
large-scale queries. This option is only valid if `APPENDONLY=TRUE`. 
Heap-storage tables can only be row-oriented.
+**ORIENTATION** — Set to `row` (the default) for row-oriented storage, or 
parquet. The parquet column-oriented format can be more efficient for 
large-scale queries. This option is only valid if `APPENDONLY=TRUE`. 
 
 **COMPRESSTYPE** — Set to `ZLIB`, `SNAPPY`, or `GZIP` to specify the type of 
compression used. `ZLIB` provides more compact compression ratios at lower 
speeds. Parquet tables support `SNAPPY` and `GZIP` compression. Append-only 
tables support `SNAPPY` and `ZLIB` compression.  This option is valid only if 
`APPENDONLY=TRUE`.
 
@@ -328,8 +328,8 @@ Using `SNAPPY` compression with parquet files is 
recommended for best performanc
 
 **Memory occupation**: When inserting or loading data to a parquet table, the 
whole rowgroup is stored in physical memory until the size exceeds the 
threshold or the end of the `INSERT` operation. Once either occurs, the entire 
rowgroup is flushed to disk. Also, at the beginning of the `INSERT` operation, 
each column is pre-allocated a page buffer. The column pre-allocated page 
buffer size should be `min(pageSizeLimit,
rowgroupSizeLimit/estimatedColumnWidth/estimatedRecordWidth)` for the first 
rowgroup. For the following rowgroup, it should be `min(pageSizeLimit,  
  actualColumnChunkSize in last rowgroup * 1.05)`, of which 1.05 is the 
estimated scaling factor. When reading data from a parquet table, the 
requested columns of the row group are loaded into memory. Memory is allocated 
8 MB by default. Ensure that memory occupation does not exceed physical memory 
when setting `ROWGROUPSIZE` or `PAGESIZE`, otherwise you may encounter an out 
of memory erro
 r. 
 
-**Batch vs. individual inserts**
-Only batch loading should be used with parquet files. Repeated individual 
inserts can result in bloated footers.
+**Bulk vs. trickle loads**
+Only bulk loads are recommended for use with parquet tables. Trickle loads can 
result in bloated footers and larger data files.
 
 ## Parquet Examples
 



[3/5] incubator-hawq-docs git commit: Updates [#128508767]

2016-08-19 Thread yozie
Updates [#128508767]


Project: http://git-wip-us.apache.org/repos/asf/incubator-hawq-docs/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-hawq-docs/commit/2349cea0
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hawq-docs/tree/2349cea0
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hawq-docs/diff/2349cea0

Branch: refs/heads/develop
Commit: 2349cea029c3c8638c3f5cf8842b8ea9ca65c426
Parents: e464585
Author: Jane Beckman 
Authored: Wed Aug 17 15:18:09 2016 -0700
Committer: David Yozie 
Committed: Fri Aug 19 10:47:48 2016 -0700

--
 reference/cli/admin_utilities/hawqstate.html.md.erb | 8 
 1 file changed, 8 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/incubator-hawq-docs/blob/2349cea0/reference/cli/admin_utilities/hawqstate.html.md.erb
--
diff --git a/reference/cli/admin_utilities/hawqstate.html.md.erb 
b/reference/cli/admin_utilities/hawqstate.html.md.erb
index d272892..3927442 100644
--- a/reference/cli/admin_utilities/hawqstate.html.md.erb
+++ b/reference/cli/admin_utilities/hawqstate.html.md.erb
@@ -9,10 +9,8 @@ Shows the status of a running HAWQ system.
 ``` pre
 hawq state 
  [-b]
- [-d  | --datadir ]
  [-l  | --logdir ]
  [(-v | --verbose) | (-q | --quiet)]  
- [--hawqhome ]
  
 hawq state [-h | --help]
 ```
@@ -32,12 +30,6 @@ The `hawq state` utility displays information about a 
running HAWQ instance. A H
 -b (brief status)  
 Display a brief summary of the state of the HAWQ system. This is the 
default mode.
 
--d, -\\\-datadir \  
-Status of the master data directory.
-
--\\\-hawqhome \  
-Display details of the designated home data directory if`$GPHOME` is not 
defined.` $GPHOME` is used by default in a standard installation.
-
 -l, -\\\-logdir \  
 Specifies the directory to check for logfiles. The default is 
`$GPHOME/hawqAdminLogs`. 
 



[1/5] incubator-hawq-docs git commit: pxf/hive reorganize syntax example and chg some params [#128450965]

2016-08-19 Thread yozie
Repository: incubator-hawq-docs
Updated Branches:
  refs/heads/develop 6e9f482ad -> 1f6714a31


pxf/hive reorganize syntax example and chg some params [#128450965]


Project: http://git-wip-us.apache.org/repos/asf/incubator-hawq-docs/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-hawq-docs/commit/4dfb8cd5
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hawq-docs/tree/4dfb8cd5
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hawq-docs/diff/4dfb8cd5

Branch: refs/heads/develop
Commit: 4dfb8cd51445dc9c3cc8f097b46cb0863f1fa596
Parents: 6e9f482
Author: Lisa Owen 
Authored: Wed Aug 17 08:42:22 2016 -0700
Committer: David Yozie 
Committed: Fri Aug 19 10:47:35 2016 -0700

--
 pxf/HivePXF.html.md.erb | 11 +++
 1 file changed, 7 insertions(+), 4 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/incubator-hawq-docs/blob/4dfb8cd5/pxf/HivePXF.html.md.erb
--
diff --git a/pxf/HivePXF.html.md.erb b/pxf/HivePXF.html.md.erb
index a7160d3..efac11e 100644
--- a/pxf/HivePXF.html.md.erb
+++ b/pxf/HivePXF.html.md.erb
@@ -72,9 +72,8 @@ PXF has three built-in profiles for Hive tables:
 -   HiveRC
 -   HiveText
 
-The Hive profile works with any Hive storage type. Use HiveRC and HiveText to 
query RC and Text formats respectively. The HiveRC and HiveText profiles are 
faster than the generic Hive profile. When using the HiveRC and HiveText 
profiles, you must specify a DELIMITER option in the LOCATION clause. See 
[Using Profiles to Read and Write 
Data](ReadWritePXF.html#readingandwritingdatawithpxf) for more information on 
profiles.
-
-The following example creates a readable HAWQ external table representing a 
Hive table named `/user/eddie/test` using the PXF Hive profile:
+The Hive profile works with any Hive storage type. 
+The following example creates a readable HAWQ external table representing a 
Hive table named `accessories` in the `inventory` Hive database using the PXF 
Hive profile:
 
 ``` shell
 $ psql -d postgres
@@ -82,10 +81,14 @@ $ psql -d postgres
 
 ``` sql
 postgres=# CREATE EXTERNAL TABLE hivetest(id int, newid int)
-LOCATION ('pxf://namenode:51200/hive-db-name.test?PROFILE=Hive')
+LOCATION ('pxf://namenode:51200/inventory.accessories?PROFILE=Hive')
 FORMAT 'custom' (formatter='pxfwritable_import');
 ```
 
+
+Use HiveRC and HiveText to query RC and Text formats respectively. The HiveRC 
and HiveText profiles are faster than the generic Hive profile. When using the 
HiveRC and HiveText profiles, you must specify a DELIMITER option in the 
LOCATION clause. See [Using Profiles to Read and Write 
Data](ReadWritePXF.html#readingandwritingdatawithpxf) for more information on 
profiles.
+
+
 ### Hive Complex Types
 
 PXF tables support Hive data types that are not primitive types. The supported 
Hive complex data types are array, struct, map, and union. This Hive `CREATE 
TABLE` statement, for example, creates a table with each of these complex data 
types:



[5/5] incubator-hawq-docs git commit: Clarify PXF segment control

2016-08-19 Thread yozie
Clarify PXF segment control


Project: http://git-wip-us.apache.org/repos/asf/incubator-hawq-docs/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-hawq-docs/commit/1f6714a3
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hawq-docs/tree/1f6714a3
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hawq-docs/diff/1f6714a3

Branch: refs/heads/develop
Commit: 1f6714a318255596fb6dbd15d3a49866d753294b
Parents: 42fa1bc
Author: Jane Beckman 
Authored: Thu Aug 18 16:55:24 2016 -0700
Committer: David Yozie 
Committed: Fri Aug 19 10:48:04 2016 -0700

--
 bestpractices/general_bestpractices.html.md.erb | 1 +
 ddl/ddl-table.html.md.erb   | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/incubator-hawq-docs/blob/1f6714a3/bestpractices/general_bestpractices.html.md.erb
--
diff --git a/bestpractices/general_bestpractices.html.md.erb 
b/bestpractices/general_bestpractices.html.md.erb
index 3d991cd..6c663c3 100644
--- a/bestpractices/general_bestpractices.html.md.erb
+++ b/bestpractices/general_bestpractices.html.md.erb
@@ -17,6 +17,7 @@ When using HAWQ, adhere to the following guidelines for best 
results:
 -   **Available resources**. Resources available at query time. If more 
resources are available in the resource queue, the resources will be used.
 -   **Hash table and bucket number**. If the query involves only 
hash-distributed tables, and the bucket number (bucketnum) configured for all 
the hash tables is either the same bucket number for all tables or the table 
size for random tables is no more than 1.5 times larger than the size of hash 
tables for the hash tables, then the query's parallelism is fixed (equal to the 
hash table bucket number). Otherwise, the number of virtual segments depends on 
the query's cost and hash-distributed table queries will behave like queries on 
randomly distributed tables.
 -   **Query Type**: For queries with some user-defined functions or for 
external tables where calculating resource costs is difficult , then the number 
of virtual segments is controlled by `hawq_rm_nvseg_perquery_limit `and 
`hawq_rm_nvseg_perquery_perseg_limit` parameters, as well as by the ON clause 
and the location list of external tables. If the query has a hash result table 
(e.g. `INSERT into hash_table`) then the number of virtual segment number must 
be equal to the bucket number of the resulting hash table, If the query is 
performed in utility mode, such as for `COPY` and `ANALYZE` operations, the 
virtual segment number is calculated by different policies, which will be 
explained later in this section.
+-   **PXF**: PXF external tables use the 
`default_hash_table_bucket_number` parameter, not the 
`hawq_rm_nvseg_perquery_perseg_limit` parameter, to control the number of 
virtual segments. 
 
 See [Query Performance](../query/query-performance.html#topic38) for more 
details.
 

http://git-wip-us.apache.org/repos/asf/incubator-hawq-docs/blob/1f6714a3/ddl/ddl-table.html.md.erb
--
diff --git a/ddl/ddl-table.html.md.erb b/ddl/ddl-table.html.md.erb
index a29409c..7120031 100644
--- a/ddl/ddl-table.html.md.erb
+++ b/ddl/ddl-table.html.md.erb
@@ -68,7 +68,7 @@ All HAWQ tables are distributed. The default is `DISTRIBUTED 
RANDOMLY` \(round-r
 
 Randomly distributed tables have benefits over hash distributed tables. For 
example, after expansion, HAWQ's elasticity feature lets it automatically use 
more resources without needing to redistribute the data. For extremely large 
tables, redistribution is very expensive. Also, data locality for randomly 
distributed tables is better, especially after the underlying HDFS 
redistributes its data during rebalancing or because of data node failures. 
This is quite common when the cluster is large.
 
-However, hash distributed tables can be faster than randomly distributed 
tables. For example, for TPCH queries, where there are several queries, HASH 
distributed tables can have performance benefits. Choose a distribution policy 
that best suits your application scenario. When you `CREATE TABLE`, you can 
also specify the `bucketnum` option. The `bucketnum` determines the number of 
hash buckets used in creating a hash-distributed table or for pxf external 
table intermediate processing. The number of buckets also affects how many 
virtual segments will be created when processing this data. The bucketnumber of 
a gpfdist external table is the number of gpfdist location, and the 
bucketnumber of a command external table is `ON #num`.
+However, hash distributed tables can be faster than randomly distributed 
tables. For example, for TPCH queries, where there are several queries, HASH 
dis

[2/5] incubator-hawq-docs git commit: enhance pxf/hive database info [#128450965]

2016-08-19 Thread yozie
enhance pxf/hive database info [#128450965]


Project: http://git-wip-us.apache.org/repos/asf/incubator-hawq-docs/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-hawq-docs/commit/e464585d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hawq-docs/tree/e464585d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hawq-docs/diff/e464585d

Branch: refs/heads/develop
Commit: e464585d7ec821c375c89e822aefac90ba429173
Parents: 4dfb8cd
Author: Lisa Owen 
Authored: Wed Aug 17 13:37:51 2016 -0700
Committer: David Yozie 
Committed: Fri Aug 19 10:47:43 2016 -0700

--
 pxf/HivePXF.html.md.erb | 3 +++
 1 file changed, 3 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/incubator-hawq-docs/blob/e464585d/pxf/HivePXF.html.md.erb
--
diff --git a/pxf/HivePXF.html.md.erb b/pxf/HivePXF.html.md.erb
index efac11e..db3e53c 100644
--- a/pxf/HivePXF.html.md.erb
+++ b/pxf/HivePXF.html.md.erb
@@ -64,6 +64,9 @@ where `` is:
  | PROFILE=profile-name
 ```
 
+
+If `hive-db-name` is omitted, pxf will default to the Hive `default` database.
+
 **Note:** The port is the connection port for the PXF service. If the port is 
omitted, PXF assumes that High Availability (HA) is enabled and connects to the 
HA name service port, 51200 by default. The HA name service port can be changed 
by setting the pxf\_service\_port configuration parameter.
 
 PXF has three built-in profiles for Hive tables:



incubator-hawq-docs git commit: Updates [#128377837]

2016-08-19 Thread yozie
Repository: incubator-hawq-docs
Updated Branches:
  refs/heads/develop e09d51086 -> 6e9f482ad


Updates [#128377837]


Project: http://git-wip-us.apache.org/repos/asf/incubator-hawq-docs/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-hawq-docs/commit/6e9f482a
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hawq-docs/tree/6e9f482a
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hawq-docs/diff/6e9f482a

Branch: refs/heads/develop
Commit: 6e9f482adaf9734f8d8dd6d699dbad61d5aa6355
Parents: e09d510
Author: Jane Beckman 
Authored: Mon Aug 15 16:41:07 2016 -0700
Committer: David Yozie 
Committed: Fri Aug 19 10:46:20 2016 -0700

--
 .../admin_utilities/hawqregister.html.md.erb| 27 +---
 1 file changed, 18 insertions(+), 9 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/incubator-hawq-docs/blob/6e9f482a/reference/cli/admin_utilities/hawqregister.html.md.erb
--
diff --git a/reference/cli/admin_utilities/hawqregister.html.md.erb 
b/reference/cli/admin_utilities/hawqregister.html.md.erb
index d08558f..1eeaa74 100644
--- a/reference/cli/admin_utilities/hawqregister.html.md.erb
+++ b/reference/cli/admin_utilities/hawqregister.html.md.erb
@@ -11,6 +11,10 @@ hawq register   
  [-h ] 
  [-p ] 
  [-U ] 
+ [-d ]
+ [-t ] 
+ [-f ] 
+ [-c ]  
 hawq register help | -? 
 hawq register --version
 ```
@@ -50,15 +54,6 @@ The following HIVE data types cannot be converted to HAWQ 
equivalents: timestamp
 
 **Connection Options**
 
-\  
-The database to register the parquet HDFS data into.
- 
-\ 
-The HAWQ table that will store the parquet data. The table cannot use hash 
distribution: only tables using random distribution can be registered into 
HAWQ.
-
-\
-The path of the file or directory containing the files to be 
registered.
-
 -h \ 
 Specifies the host name of the machine on which the HAWQ master database 
server is running. If not specified, reads from the environment variable 
`$PGHOST` or defaults to `localhost`.
 
@@ -68,6 +63,20 @@ The following HIVE data types cannot be converted to HAWQ 
equivalents: timestamp
 -U \  
 The database role name to connect as. If not specified, reads from the 
environment variable `$PGUSER` or defaults to the current system user name.
 
+-d  , --database \  
+The database to register the parquet HDFS data into. The default is 
`postgres`
+ 
+-t , --tablename \ 
+The HAWQ table that will store the parquet data. The table cannot use hash 
distribution: only tables using random distribution can be registered into 
HAWQ.
+
+-f , --filepath \
+The path of the file or directory in HDFS containing the files to be 
registered.
+
+-c , --config \  
+Registers a YAML-format configuration file into HAWQ.
+
+
+
 ## Examples
 
 This example shows how to register a HIVE-generated parquet file in HDFS into 
the table `parquet_table` in HAWQ, which is in the database named `postgres`. 
The file path of the HIVE-generated file is 
`hdfs://localhost:8020/temp/hive.paq`.