[1/2] phoenix git commit: PHOENIX-3600 Core MapReduce classes don't provide location info
Repository: phoenix Updated Branches: refs/heads/4.x-HBase-1.1 dc3536e74 -> dbb0c1ea0 PHOENIX-3600 Core MapReduce classes don't provide location info This mostly just ports the same functionality in the phoenix-hive MR classes to the main classes. Adds a new configuration parameter 'phoenix.mapreduce.split.by.stats', defaulting to true, to create input splits based off the scans provided by statistics, not just the region locations. Project: http://git-wip-us.apache.org/repos/asf/phoenix/repo Commit: http://git-wip-us.apache.org/repos/asf/phoenix/commit/fc4f9783 Tree: http://git-wip-us.apache.org/repos/asf/phoenix/tree/fc4f9783 Diff: http://git-wip-us.apache.org/repos/asf/phoenix/diff/fc4f9783 Branch: refs/heads/4.x-HBase-1.1 Commit: fc4f97832b32c3c25cd8ad0a89a36393da32608b Parents: dc3536e Author: Josh MahoninAuthored: Mon Feb 13 10:55:06 2017 -0500 Committer: Josh Mahonin Committed: Mon Feb 13 11:03:33 2017 -0500 -- .../phoenix/mapreduce/PhoenixInputFormat.java | 69 ++-- .../phoenix/mapreduce/PhoenixInputSplit.java| 23 ++- .../util/PhoenixConfigurationUtil.java | 11 3 files changed, 96 insertions(+), 7 deletions(-) -- http://git-wip-us.apache.org/repos/asf/phoenix/blob/fc4f9783/phoenix-core/src/main/java/org/apache/phoenix/mapreduce/PhoenixInputFormat.java -- diff --git a/phoenix-core/src/main/java/org/apache/phoenix/mapreduce/PhoenixInputFormat.java b/phoenix-core/src/main/java/org/apache/phoenix/mapreduce/PhoenixInputFormat.java index df96c7b..14f7b94 100644 --- a/phoenix-core/src/main/java/org/apache/phoenix/mapreduce/PhoenixInputFormat.java +++ b/phoenix-core/src/main/java/org/apache/phoenix/mapreduce/PhoenixInputFormat.java @@ -21,14 +21,18 @@ import java.io.IOException; import java.sql.Connection; import java.sql.SQLException; import java.sql.Statement; +import java.util.Collections; import java.util.List; import java.util.Properties; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hbase.client.Scan; +import org.apache.hadoop.hbase.HRegionLocation; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.*; import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.RegionSizeCalculator; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.mapreduce.InputFormat; import org.apache.hadoop.mapreduce.InputSplit; @@ -42,6 +46,7 @@ import org.apache.phoenix.iterate.MapReduceParallelScanGrouper; import org.apache.phoenix.jdbc.PhoenixStatement; import org.apache.phoenix.mapreduce.util.ConnectionUtil; import org.apache.phoenix.mapreduce.util.PhoenixConfigurationUtil; +import org.apache.phoenix.mapreduce.util.PhoenixMapReduceUtil; import org.apache.phoenix.query.KeyRange; import org.apache.phoenix.util.PhoenixRuntime; @@ -80,16 +85,72 @@ public class PhoenixInputFormat extends InputFormat allSplits = queryPlan.getSplits(); -final List splits = generateSplits(queryPlan,allSplits); +final List splits = generateSplits(queryPlan, allSplits, configuration); return splits; } -private List generateSplits(final QueryPlan qplan, final List splits) throws IOException { +private List generateSplits(final QueryPlan qplan, final List splits, Configuration config) throws IOException { Preconditions.checkNotNull(qplan); Preconditions.checkNotNull(splits); + +// Get the RegionSizeCalculator +org.apache.hadoop.hbase.client.Connection connection = ConnectionFactory.createConnection(config); +RegionLocator regionLocator = connection.getRegionLocator(TableName.valueOf(qplan +.getTableRef().getTable().getPhysicalName().toString())); +RegionSizeCalculator sizeCalculator = new RegionSizeCalculator(regionLocator, connection +.getAdmin()); + + final List psplits = Lists.newArrayListWithExpectedSize(splits.size()); for (List scans : qplan.getScans()) { -psplits.add(new PhoenixInputSplit(scans)); +// Get the region location +HRegionLocation location = regionLocator.getRegionLocation( +scans.get(0).getStartRow(), +false +); + +String regionLocation = location.getHostname(); + +// Get the region size +long regionSize = sizeCalculator.getRegionSize( +location.getRegionInfo().getRegionName() +); + +// Generate splits based off statistics, or just region splits? +boolean splitByStats =
[1/2] phoenix git commit: PHOENIX-3600 Core MapReduce classes don't provide location info
Repository: phoenix Updated Branches: refs/heads/4.x-HBase-0.98 44dc576cf -> c1027f17f PHOENIX-3600 Core MapReduce classes don't provide location info This mostly just ports the same functionality in the phoenix-hive MR classes to the main classes. Adds a new configuration parameter 'phoenix.mapreduce.split.by.stats', defaulting to true, to create input splits based off the scans provided by statistics, not just the region locations. Project: http://git-wip-us.apache.org/repos/asf/phoenix/repo Commit: http://git-wip-us.apache.org/repos/asf/phoenix/commit/e1b1cd87 Tree: http://git-wip-us.apache.org/repos/asf/phoenix/tree/e1b1cd87 Diff: http://git-wip-us.apache.org/repos/asf/phoenix/diff/e1b1cd87 Branch: refs/heads/4.x-HBase-0.98 Commit: e1b1cd8733d7adfca3a17899630c73881af187f1 Parents: 44dc576 Author: Josh MahoninAuthored: Mon Feb 13 10:55:06 2017 -0500 Committer: Josh Mahonin Committed: Mon Feb 13 11:04:40 2017 -0500 -- .../phoenix/mapreduce/PhoenixInputFormat.java | 69 ++-- .../phoenix/mapreduce/PhoenixInputSplit.java| 23 ++- .../util/PhoenixConfigurationUtil.java | 11 3 files changed, 96 insertions(+), 7 deletions(-) -- http://git-wip-us.apache.org/repos/asf/phoenix/blob/e1b1cd87/phoenix-core/src/main/java/org/apache/phoenix/mapreduce/PhoenixInputFormat.java -- diff --git a/phoenix-core/src/main/java/org/apache/phoenix/mapreduce/PhoenixInputFormat.java b/phoenix-core/src/main/java/org/apache/phoenix/mapreduce/PhoenixInputFormat.java index df96c7b..14f7b94 100644 --- a/phoenix-core/src/main/java/org/apache/phoenix/mapreduce/PhoenixInputFormat.java +++ b/phoenix-core/src/main/java/org/apache/phoenix/mapreduce/PhoenixInputFormat.java @@ -21,14 +21,18 @@ import java.io.IOException; import java.sql.Connection; import java.sql.SQLException; import java.sql.Statement; +import java.util.Collections; import java.util.List; import java.util.Properties; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hbase.client.Scan; +import org.apache.hadoop.hbase.HRegionLocation; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.*; import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.RegionSizeCalculator; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.mapreduce.InputFormat; import org.apache.hadoop.mapreduce.InputSplit; @@ -42,6 +46,7 @@ import org.apache.phoenix.iterate.MapReduceParallelScanGrouper; import org.apache.phoenix.jdbc.PhoenixStatement; import org.apache.phoenix.mapreduce.util.ConnectionUtil; import org.apache.phoenix.mapreduce.util.PhoenixConfigurationUtil; +import org.apache.phoenix.mapreduce.util.PhoenixMapReduceUtil; import org.apache.phoenix.query.KeyRange; import org.apache.phoenix.util.PhoenixRuntime; @@ -80,16 +85,72 @@ public class PhoenixInputFormat extends InputFormat allSplits = queryPlan.getSplits(); -final List splits = generateSplits(queryPlan,allSplits); +final List splits = generateSplits(queryPlan, allSplits, configuration); return splits; } -private List generateSplits(final QueryPlan qplan, final List splits) throws IOException { +private List generateSplits(final QueryPlan qplan, final List splits, Configuration config) throws IOException { Preconditions.checkNotNull(qplan); Preconditions.checkNotNull(splits); + +// Get the RegionSizeCalculator +org.apache.hadoop.hbase.client.Connection connection = ConnectionFactory.createConnection(config); +RegionLocator regionLocator = connection.getRegionLocator(TableName.valueOf(qplan +.getTableRef().getTable().getPhysicalName().toString())); +RegionSizeCalculator sizeCalculator = new RegionSizeCalculator(regionLocator, connection +.getAdmin()); + + final List psplits = Lists.newArrayListWithExpectedSize(splits.size()); for (List scans : qplan.getScans()) { -psplits.add(new PhoenixInputSplit(scans)); +// Get the region location +HRegionLocation location = regionLocator.getRegionLocation( +scans.get(0).getStartRow(), +false +); + +String regionLocation = location.getHostname(); + +// Get the region size +long regionSize = sizeCalculator.getRegionSize( +location.getRegionInfo().getRegionName() +); + +// Generate splits based off statistics, or just region splits? +boolean splitByStats =
[1/2] phoenix git commit: PHOENIX-3600 Core MapReduce classes don't provide location info
Repository: phoenix Updated Branches: refs/heads/master 41d6349bd -> 8f2d0fbc5 PHOENIX-3600 Core MapReduce classes don't provide location info This mostly just ports the same functionality in the phoenix-hive MR classes to the main classes. Adds a new configuration parameter 'phoenix.mapreduce.split.by.stats', defaulting to true, to create input splits based off the scans provided by statistics, not just the region locations. Project: http://git-wip-us.apache.org/repos/asf/phoenix/repo Commit: http://git-wip-us.apache.org/repos/asf/phoenix/commit/267323da Tree: http://git-wip-us.apache.org/repos/asf/phoenix/tree/267323da Diff: http://git-wip-us.apache.org/repos/asf/phoenix/diff/267323da Branch: refs/heads/master Commit: 267323da8242fb6f0953c1a75cf96c5fde3d49ed Parents: 41d6349 Author: Josh MahoninAuthored: Mon Feb 13 10:55:06 2017 -0500 Committer: Josh Mahonin Committed: Mon Feb 13 10:55:06 2017 -0500 -- .../phoenix/mapreduce/PhoenixInputFormat.java | 69 ++-- .../phoenix/mapreduce/PhoenixInputSplit.java| 23 ++- .../util/PhoenixConfigurationUtil.java | 11 3 files changed, 96 insertions(+), 7 deletions(-) -- http://git-wip-us.apache.org/repos/asf/phoenix/blob/267323da/phoenix-core/src/main/java/org/apache/phoenix/mapreduce/PhoenixInputFormat.java -- diff --git a/phoenix-core/src/main/java/org/apache/phoenix/mapreduce/PhoenixInputFormat.java b/phoenix-core/src/main/java/org/apache/phoenix/mapreduce/PhoenixInputFormat.java index df96c7b..14f7b94 100644 --- a/phoenix-core/src/main/java/org/apache/phoenix/mapreduce/PhoenixInputFormat.java +++ b/phoenix-core/src/main/java/org/apache/phoenix/mapreduce/PhoenixInputFormat.java @@ -21,14 +21,18 @@ import java.io.IOException; import java.sql.Connection; import java.sql.SQLException; import java.sql.Statement; +import java.util.Collections; import java.util.List; import java.util.Properties; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hbase.client.Scan; +import org.apache.hadoop.hbase.HRegionLocation; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.*; import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.RegionSizeCalculator; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.mapreduce.InputFormat; import org.apache.hadoop.mapreduce.InputSplit; @@ -42,6 +46,7 @@ import org.apache.phoenix.iterate.MapReduceParallelScanGrouper; import org.apache.phoenix.jdbc.PhoenixStatement; import org.apache.phoenix.mapreduce.util.ConnectionUtil; import org.apache.phoenix.mapreduce.util.PhoenixConfigurationUtil; +import org.apache.phoenix.mapreduce.util.PhoenixMapReduceUtil; import org.apache.phoenix.query.KeyRange; import org.apache.phoenix.util.PhoenixRuntime; @@ -80,16 +85,72 @@ public class PhoenixInputFormat extends InputFormat allSplits = queryPlan.getSplits(); -final List splits = generateSplits(queryPlan,allSplits); +final List splits = generateSplits(queryPlan, allSplits, configuration); return splits; } -private List generateSplits(final QueryPlan qplan, final List splits) throws IOException { +private List generateSplits(final QueryPlan qplan, final List splits, Configuration config) throws IOException { Preconditions.checkNotNull(qplan); Preconditions.checkNotNull(splits); + +// Get the RegionSizeCalculator +org.apache.hadoop.hbase.client.Connection connection = ConnectionFactory.createConnection(config); +RegionLocator regionLocator = connection.getRegionLocator(TableName.valueOf(qplan +.getTableRef().getTable().getPhysicalName().toString())); +RegionSizeCalculator sizeCalculator = new RegionSizeCalculator(regionLocator, connection +.getAdmin()); + + final List psplits = Lists.newArrayListWithExpectedSize(splits.size()); for (List scans : qplan.getScans()) { -psplits.add(new PhoenixInputSplit(scans)); +// Get the region location +HRegionLocation location = regionLocator.getRegionLocation( +scans.get(0).getStartRow(), +false +); + +String regionLocation = location.getHostname(); + +// Get the region size +long regionSize = sizeCalculator.getRegionSize( +location.getRegionInfo().getRegionName() +); + +// Generate splits based off statistics, or just region splits? +boolean splitByStats = PhoenixConfigurationUtil.getSplitByStats(config); + +