[1/2] phoenix git commit: PHOENIX-3600 Core MapReduce classes don't provide location info

2017-02-13 Thread jmahonin
Repository: phoenix
Updated Branches:
  refs/heads/4.x-HBase-1.1 dc3536e74 -> dbb0c1ea0


PHOENIX-3600 Core MapReduce classes don't provide location info

This mostly just ports the same functionality in the phoenix-hive MR
classes to the main classes. Adds a new configuration parameter
'phoenix.mapreduce.split.by.stats', defaulting to true, to create
input splits based off the scans provided by statistics, not just the
region locations.


Project: http://git-wip-us.apache.org/repos/asf/phoenix/repo
Commit: http://git-wip-us.apache.org/repos/asf/phoenix/commit/fc4f9783
Tree: http://git-wip-us.apache.org/repos/asf/phoenix/tree/fc4f9783
Diff: http://git-wip-us.apache.org/repos/asf/phoenix/diff/fc4f9783

Branch: refs/heads/4.x-HBase-1.1
Commit: fc4f97832b32c3c25cd8ad0a89a36393da32608b
Parents: dc3536e
Author: Josh Mahonin 
Authored: Mon Feb 13 10:55:06 2017 -0500
Committer: Josh Mahonin 
Committed: Mon Feb 13 11:03:33 2017 -0500

--
 .../phoenix/mapreduce/PhoenixInputFormat.java   | 69 ++--
 .../phoenix/mapreduce/PhoenixInputSplit.java| 23 ++-
 .../util/PhoenixConfigurationUtil.java  | 11 
 3 files changed, 96 insertions(+), 7 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/phoenix/blob/fc4f9783/phoenix-core/src/main/java/org/apache/phoenix/mapreduce/PhoenixInputFormat.java
--
diff --git 
a/phoenix-core/src/main/java/org/apache/phoenix/mapreduce/PhoenixInputFormat.java
 
b/phoenix-core/src/main/java/org/apache/phoenix/mapreduce/PhoenixInputFormat.java
index df96c7b..14f7b94 100644
--- 
a/phoenix-core/src/main/java/org/apache/phoenix/mapreduce/PhoenixInputFormat.java
+++ 
b/phoenix-core/src/main/java/org/apache/phoenix/mapreduce/PhoenixInputFormat.java
@@ -21,14 +21,18 @@ import java.io.IOException;
 import java.sql.Connection;
 import java.sql.SQLException;
 import java.sql.Statement;
+import java.util.Collections;
 import java.util.List;
 import java.util.Properties;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.HRegionLocation;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.*;
 import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.RegionSizeCalculator;
 import org.apache.hadoop.io.NullWritable;
 import org.apache.hadoop.mapreduce.InputFormat;
 import org.apache.hadoop.mapreduce.InputSplit;
@@ -42,6 +46,7 @@ import 
org.apache.phoenix.iterate.MapReduceParallelScanGrouper;
 import org.apache.phoenix.jdbc.PhoenixStatement;
 import org.apache.phoenix.mapreduce.util.ConnectionUtil;
 import org.apache.phoenix.mapreduce.util.PhoenixConfigurationUtil;
+import org.apache.phoenix.mapreduce.util.PhoenixMapReduceUtil;
 import org.apache.phoenix.query.KeyRange;
 import org.apache.phoenix.util.PhoenixRuntime;
 
@@ -80,16 +85,72 @@ public class PhoenixInputFormat 
extends InputFormat allSplits = queryPlan.getSplits();
-final List splits = generateSplits(queryPlan,allSplits);
+final List splits = generateSplits(queryPlan, allSplits, 
configuration);
 return splits;
 }
 
-private List generateSplits(final QueryPlan qplan, final 
List splits) throws IOException {
+private List generateSplits(final QueryPlan qplan, final 
List splits, Configuration config) throws IOException {
 Preconditions.checkNotNull(qplan);
 Preconditions.checkNotNull(splits);
+
+// Get the RegionSizeCalculator
+org.apache.hadoop.hbase.client.Connection connection = 
ConnectionFactory.createConnection(config);
+RegionLocator regionLocator = 
connection.getRegionLocator(TableName.valueOf(qplan
+.getTableRef().getTable().getPhysicalName().toString()));
+RegionSizeCalculator sizeCalculator = new 
RegionSizeCalculator(regionLocator, connection
+.getAdmin());
+
+
 final List psplits = 
Lists.newArrayListWithExpectedSize(splits.size());
 for (List scans : qplan.getScans()) {
-psplits.add(new PhoenixInputSplit(scans));
+// Get the region location
+HRegionLocation location = regionLocator.getRegionLocation(
+scans.get(0).getStartRow(),
+false
+);
+
+String regionLocation = location.getHostname();
+
+// Get the region size
+long regionSize = sizeCalculator.getRegionSize(
+location.getRegionInfo().getRegionName()
+);
+
+// Generate splits based off statistics, or just region splits?
+boolean splitByStats = 

[1/2] phoenix git commit: PHOENIX-3600 Core MapReduce classes don't provide location info

2017-02-13 Thread jmahonin
Repository: phoenix
Updated Branches:
  refs/heads/4.x-HBase-0.98 44dc576cf -> c1027f17f


PHOENIX-3600 Core MapReduce classes don't provide location info

This mostly just ports the same functionality in the phoenix-hive MR
classes to the main classes. Adds a new configuration parameter
'phoenix.mapreduce.split.by.stats', defaulting to true, to create
input splits based off the scans provided by statistics, not just the
region locations.


Project: http://git-wip-us.apache.org/repos/asf/phoenix/repo
Commit: http://git-wip-us.apache.org/repos/asf/phoenix/commit/e1b1cd87
Tree: http://git-wip-us.apache.org/repos/asf/phoenix/tree/e1b1cd87
Diff: http://git-wip-us.apache.org/repos/asf/phoenix/diff/e1b1cd87

Branch: refs/heads/4.x-HBase-0.98
Commit: e1b1cd8733d7adfca3a17899630c73881af187f1
Parents: 44dc576
Author: Josh Mahonin 
Authored: Mon Feb 13 10:55:06 2017 -0500
Committer: Josh Mahonin 
Committed: Mon Feb 13 11:04:40 2017 -0500

--
 .../phoenix/mapreduce/PhoenixInputFormat.java   | 69 ++--
 .../phoenix/mapreduce/PhoenixInputSplit.java| 23 ++-
 .../util/PhoenixConfigurationUtil.java  | 11 
 3 files changed, 96 insertions(+), 7 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/phoenix/blob/e1b1cd87/phoenix-core/src/main/java/org/apache/phoenix/mapreduce/PhoenixInputFormat.java
--
diff --git 
a/phoenix-core/src/main/java/org/apache/phoenix/mapreduce/PhoenixInputFormat.java
 
b/phoenix-core/src/main/java/org/apache/phoenix/mapreduce/PhoenixInputFormat.java
index df96c7b..14f7b94 100644
--- 
a/phoenix-core/src/main/java/org/apache/phoenix/mapreduce/PhoenixInputFormat.java
+++ 
b/phoenix-core/src/main/java/org/apache/phoenix/mapreduce/PhoenixInputFormat.java
@@ -21,14 +21,18 @@ import java.io.IOException;
 import java.sql.Connection;
 import java.sql.SQLException;
 import java.sql.Statement;
+import java.util.Collections;
 import java.util.List;
 import java.util.Properties;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.HRegionLocation;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.*;
 import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.RegionSizeCalculator;
 import org.apache.hadoop.io.NullWritable;
 import org.apache.hadoop.mapreduce.InputFormat;
 import org.apache.hadoop.mapreduce.InputSplit;
@@ -42,6 +46,7 @@ import 
org.apache.phoenix.iterate.MapReduceParallelScanGrouper;
 import org.apache.phoenix.jdbc.PhoenixStatement;
 import org.apache.phoenix.mapreduce.util.ConnectionUtil;
 import org.apache.phoenix.mapreduce.util.PhoenixConfigurationUtil;
+import org.apache.phoenix.mapreduce.util.PhoenixMapReduceUtil;
 import org.apache.phoenix.query.KeyRange;
 import org.apache.phoenix.util.PhoenixRuntime;
 
@@ -80,16 +85,72 @@ public class PhoenixInputFormat 
extends InputFormat allSplits = queryPlan.getSplits();
-final List splits = generateSplits(queryPlan,allSplits);
+final List splits = generateSplits(queryPlan, allSplits, 
configuration);
 return splits;
 }
 
-private List generateSplits(final QueryPlan qplan, final 
List splits) throws IOException {
+private List generateSplits(final QueryPlan qplan, final 
List splits, Configuration config) throws IOException {
 Preconditions.checkNotNull(qplan);
 Preconditions.checkNotNull(splits);
+
+// Get the RegionSizeCalculator
+org.apache.hadoop.hbase.client.Connection connection = 
ConnectionFactory.createConnection(config);
+RegionLocator regionLocator = 
connection.getRegionLocator(TableName.valueOf(qplan
+.getTableRef().getTable().getPhysicalName().toString()));
+RegionSizeCalculator sizeCalculator = new 
RegionSizeCalculator(regionLocator, connection
+.getAdmin());
+
+
 final List psplits = 
Lists.newArrayListWithExpectedSize(splits.size());
 for (List scans : qplan.getScans()) {
-psplits.add(new PhoenixInputSplit(scans));
+// Get the region location
+HRegionLocation location = regionLocator.getRegionLocation(
+scans.get(0).getStartRow(),
+false
+);
+
+String regionLocation = location.getHostname();
+
+// Get the region size
+long regionSize = sizeCalculator.getRegionSize(
+location.getRegionInfo().getRegionName()
+);
+
+// Generate splits based off statistics, or just region splits?
+boolean splitByStats = 

[1/2] phoenix git commit: PHOENIX-3600 Core MapReduce classes don't provide location info

2017-02-13 Thread jmahonin
Repository: phoenix
Updated Branches:
  refs/heads/master 41d6349bd -> 8f2d0fbc5


PHOENIX-3600 Core MapReduce classes don't provide location info

This mostly just ports the same functionality in the phoenix-hive MR
classes to the main classes. Adds a new configuration parameter
'phoenix.mapreduce.split.by.stats', defaulting to true, to create
input splits based off the scans provided by statistics, not just the
region locations.


Project: http://git-wip-us.apache.org/repos/asf/phoenix/repo
Commit: http://git-wip-us.apache.org/repos/asf/phoenix/commit/267323da
Tree: http://git-wip-us.apache.org/repos/asf/phoenix/tree/267323da
Diff: http://git-wip-us.apache.org/repos/asf/phoenix/diff/267323da

Branch: refs/heads/master
Commit: 267323da8242fb6f0953c1a75cf96c5fde3d49ed
Parents: 41d6349
Author: Josh Mahonin 
Authored: Mon Feb 13 10:55:06 2017 -0500
Committer: Josh Mahonin 
Committed: Mon Feb 13 10:55:06 2017 -0500

--
 .../phoenix/mapreduce/PhoenixInputFormat.java   | 69 ++--
 .../phoenix/mapreduce/PhoenixInputSplit.java| 23 ++-
 .../util/PhoenixConfigurationUtil.java  | 11 
 3 files changed, 96 insertions(+), 7 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/phoenix/blob/267323da/phoenix-core/src/main/java/org/apache/phoenix/mapreduce/PhoenixInputFormat.java
--
diff --git 
a/phoenix-core/src/main/java/org/apache/phoenix/mapreduce/PhoenixInputFormat.java
 
b/phoenix-core/src/main/java/org/apache/phoenix/mapreduce/PhoenixInputFormat.java
index df96c7b..14f7b94 100644
--- 
a/phoenix-core/src/main/java/org/apache/phoenix/mapreduce/PhoenixInputFormat.java
+++ 
b/phoenix-core/src/main/java/org/apache/phoenix/mapreduce/PhoenixInputFormat.java
@@ -21,14 +21,18 @@ import java.io.IOException;
 import java.sql.Connection;
 import java.sql.SQLException;
 import java.sql.Statement;
+import java.util.Collections;
 import java.util.List;
 import java.util.Properties;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.HRegionLocation;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.*;
 import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.RegionSizeCalculator;
 import org.apache.hadoop.io.NullWritable;
 import org.apache.hadoop.mapreduce.InputFormat;
 import org.apache.hadoop.mapreduce.InputSplit;
@@ -42,6 +46,7 @@ import 
org.apache.phoenix.iterate.MapReduceParallelScanGrouper;
 import org.apache.phoenix.jdbc.PhoenixStatement;
 import org.apache.phoenix.mapreduce.util.ConnectionUtil;
 import org.apache.phoenix.mapreduce.util.PhoenixConfigurationUtil;
+import org.apache.phoenix.mapreduce.util.PhoenixMapReduceUtil;
 import org.apache.phoenix.query.KeyRange;
 import org.apache.phoenix.util.PhoenixRuntime;
 
@@ -80,16 +85,72 @@ public class PhoenixInputFormat 
extends InputFormat allSplits = queryPlan.getSplits();
-final List splits = generateSplits(queryPlan,allSplits);
+final List splits = generateSplits(queryPlan, allSplits, 
configuration);
 return splits;
 }
 
-private List generateSplits(final QueryPlan qplan, final 
List splits) throws IOException {
+private List generateSplits(final QueryPlan qplan, final 
List splits, Configuration config) throws IOException {
 Preconditions.checkNotNull(qplan);
 Preconditions.checkNotNull(splits);
+
+// Get the RegionSizeCalculator
+org.apache.hadoop.hbase.client.Connection connection = 
ConnectionFactory.createConnection(config);
+RegionLocator regionLocator = 
connection.getRegionLocator(TableName.valueOf(qplan
+.getTableRef().getTable().getPhysicalName().toString()));
+RegionSizeCalculator sizeCalculator = new 
RegionSizeCalculator(regionLocator, connection
+.getAdmin());
+
+
 final List psplits = 
Lists.newArrayListWithExpectedSize(splits.size());
 for (List scans : qplan.getScans()) {
-psplits.add(new PhoenixInputSplit(scans));
+// Get the region location
+HRegionLocation location = regionLocator.getRegionLocation(
+scans.get(0).getStartRow(),
+false
+);
+
+String regionLocation = location.getHostname();
+
+// Get the region size
+long regionSize = sizeCalculator.getRegionSize(
+location.getRegionInfo().getRegionName()
+);
+
+// Generate splits based off statistics, or just region splits?
+boolean splitByStats = 
PhoenixConfigurationUtil.getSplitByStats(config);
+
+