Repository: incubator-hawq Updated Branches: refs/heads/master 099557973 -> b7f9f36f5
HAWQ-1461. Improve partition parameters validation for PXF-JDBC plugin. Project: http://git-wip-us.apache.org/repos/asf/incubator-hawq/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-hawq/commit/b7f9f36f Tree: http://git-wip-us.apache.org/repos/asf/incubator-hawq/tree/b7f9f36f Diff: http://git-wip-us.apache.org/repos/asf/incubator-hawq/diff/b7f9f36f Branch: refs/heads/master Commit: b7f9f36f5ff2f1473960803ab52bce2886d45abf Parents: 0995579 Author: Lav Jain <[email protected]> Authored: Fri Jun 9 15:34:35 2017 -0700 Committer: Oleksandr Diachenko <[email protected]> Committed: Fri Jun 9 15:34:35 2017 -0700 ---------------------------------------------------------------------- pxf/build.gradle | 2 +- .../org/apache/hawq/pxf/api/StatsAccessor.java | 2 + .../hawq/pxf/api/utilities/Utilities.java | 4 +- pxf/pxf-jdbc/README.md | 2 - .../plugins/jdbc/JdbcPartitionFragmenter.java | 40 +++-- .../jdbc/JdbcPartitionFragmenterTest.java | 173 ++++++++++++------- 6 files changed, 139 insertions(+), 84 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/b7f9f36f/pxf/build.gradle ---------------------------------------------------------------------- diff --git a/pxf/build.gradle b/pxf/build.gradle index 3fd9992..80d30df 100644 --- a/pxf/build.gradle +++ b/pxf/build.gradle @@ -520,7 +520,7 @@ def tomcatTargetDir = "tomcat/build" task tomcatGet << { apply plugin: 'de.undercouch.download' - + def TarGzSuffix = ".tar.gz" def tomcatTar = "${tomcatName}${TarGzSuffix}" def tomcatUrl = "http://archive.apache.org/dist/tomcat/tomcat-7/v${tomcatVersion}/bin/${tomcatTar}" http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/b7f9f36f/pxf/pxf-api/src/main/java/org/apache/hawq/pxf/api/StatsAccessor.java ---------------------------------------------------------------------- diff --git a/pxf/pxf-api/src/main/java/org/apache/hawq/pxf/api/StatsAccessor.java b/pxf/pxf-api/src/main/java/org/apache/hawq/pxf/api/StatsAccessor.java index 7ecdf52..d256e77 100644 --- a/pxf/pxf-api/src/main/java/org/apache/hawq/pxf/api/StatsAccessor.java +++ b/pxf/pxf-api/src/main/java/org/apache/hawq/pxf/api/StatsAccessor.java @@ -29,11 +29,13 @@ public interface StatsAccessor extends ReadAccessor { /** * Method which reads needed statistics for current split + * @throws Exception if retrieving the stats failed */ public void retrieveStats() throws Exception; /** * Returns next tuple based on statistics information without actual reading of data + * @return next row without reading it from disk */ public OneRow emitAggObject(); http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/b7f9f36f/pxf/pxf-api/src/main/java/org/apache/hawq/pxf/api/utilities/Utilities.java ---------------------------------------------------------------------- diff --git a/pxf/pxf-api/src/main/java/org/apache/hawq/pxf/api/utilities/Utilities.java b/pxf/pxf-api/src/main/java/org/apache/hawq/pxf/api/utilities/Utilities.java index 916c2b5..ed8ad28 100644 --- a/pxf/pxf-api/src/main/java/org/apache/hawq/pxf/api/utilities/Utilities.java +++ b/pxf/pxf-api/src/main/java/org/apache/hawq/pxf/api/utilities/Utilities.java @@ -164,7 +164,7 @@ public class Utilities { * @param inputData input data which has protocol information * @return fragment metadata * @throws IllegalArgumentException if fragment metadata information wasn't found in input data - * @throws Exception + * @throws Exception if unable to parse the fragment */ public static FragmentMetadata parseFragmentMetadata(InputData inputData) throws Exception { byte[] serializedLocation = inputData.getFragmentMetadata(); @@ -198,7 +198,7 @@ public class Utilities { /** * Based on accessor information determines whether to use AggBridge * - * @param protData + * @param inputData input data * @return true if AggBridge is applicable for current context */ public static boolean useAggBridge(InputData inputData) { http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/b7f9f36f/pxf/pxf-jdbc/README.md ---------------------------------------------------------------------- diff --git a/pxf/pxf-jdbc/README.md b/pxf/pxf-jdbc/README.md index e8c4bc0..c158f32 100644 --- a/pxf/pxf-jdbc/README.md +++ b/pxf/pxf-jdbc/README.md @@ -105,8 +105,6 @@ The `PARTITION_BY` parameter can be null, and there will be only one fragment. The `RANGE` parameter indicates the range of data to be queried , it can be split by colon(':'). The range is left-closed, ie: `>= start_value AND < end_value` . - If the `column_type` is `int`, the `end_value` can be empty. - If the `column_type` is `enum`,the parameter `RANGE` can be empty. The `INTERVAL` parameter can be split by colon(':'), indicate the interval value of one fragment. When `column_type` is `date`,this parameter must http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/b7f9f36f/pxf/pxf-jdbc/src/main/java/org/apache/hawq/pxf/plugins/jdbc/JdbcPartitionFragmenter.java ---------------------------------------------------------------------- diff --git a/pxf/pxf-jdbc/src/main/java/org/apache/hawq/pxf/plugins/jdbc/JdbcPartitionFragmenter.java b/pxf/pxf-jdbc/src/main/java/org/apache/hawq/pxf/plugins/jdbc/JdbcPartitionFragmenter.java index 8b5886d..c693fd4 100644 --- a/pxf/pxf-jdbc/src/main/java/org/apache/hawq/pxf/plugins/jdbc/JdbcPartitionFragmenter.java +++ b/pxf/pxf-jdbc/src/main/java/org/apache/hawq/pxf/plugins/jdbc/JdbcPartitionFragmenter.java @@ -19,19 +19,21 @@ package org.apache.hawq.pxf.plugins.jdbc; * under the License. */ +import org.apache.hawq.pxf.api.Fragment; import org.apache.hawq.pxf.api.Fragmenter; import org.apache.hawq.pxf.api.FragmentsStats; import org.apache.hawq.pxf.api.UserDataException; -import org.apache.hawq.pxf.plugins.jdbc.utils.DbProduct; -import org.apache.hawq.pxf.plugins.jdbc.utils.ByteUtil; -import org.apache.hawq.pxf.api.Fragment; import org.apache.hawq.pxf.api.utilities.InputData; +import org.apache.hawq.pxf.plugins.jdbc.utils.ByteUtil; +import org.apache.hawq.pxf.plugins.jdbc.utils.DbProduct; import java.net.InetAddress; import java.net.UnknownHostException; import java.text.ParseException; import java.text.SimpleDateFormat; -import java.util.*; +import java.util.Calendar; +import java.util.Date; +import java.util.List; /** * Fragmenter class for JDBC data resources. @@ -111,30 +113,42 @@ public class JdbcPartitionFragmenter extends Fragmenter { partitionBy = inConf.getUserProperty("PARTITION_BY").split(":"); partitionColumn = partitionBy[0]; partitionType = PartitionType.getType(partitionBy[1]); - } catch (IllegalArgumentException e1) { + } catch (IllegalArgumentException | ArrayIndexOutOfBoundsException e1) { throw new UserDataException("The parameter 'PARTITION_BY' invalid, the pattern is 'column_name:date|int|enum'"); } + //parse and validate parameter-RANGE try { - range = inConf.getUserProperty("RANGE").split(":"); + String rangeStr = inConf.getUserProperty("RANGE"); + if (rangeStr != null) { + range = rangeStr.split(":"); + if (range.length == 1 && partitionType != PartitionType.ENUM) + throw new UserDataException("The parameter 'RANGE' does not specify '[:end_value]'"); + } else + throw new UserDataException("The parameter 'RANGE' must be specified along with 'PARTITION_BY'"); } catch (IllegalArgumentException e1) { throw new UserDataException("The parameter 'RANGE' invalid, the pattern is 'start_value[:end_value]'"); } + + //parse and validate parameter-INTERVAL try { - //parse and validate parameter-INTERVAL - if (inConf.getUserProperty("INTERVAL") != null) { - interval = inConf.getUserProperty("INTERVAL").split(":"); + String intervalStr = inConf.getUserProperty("INTERVAL"); + if (intervalStr != null) { + interval = intervalStr.split(":"); intervalNum = Integer.parseInt(interval[0]); if (interval.length > 1) intervalType = IntervalType.type(interval[1]); - } + if (interval.length == 1 && partitionType == PartitionType.DATE) + throw new UserDataException("The parameter 'INTERVAL' does not specify unit [:year|month|day]"); + } else if (partitionType != PartitionType.ENUM) + throw new UserDataException("The parameter 'INTERVAL' must be specified along with 'PARTITION_BY'"); if (intervalNum < 1) throw new UserDataException("The parameter 'INTERVAL' must > 1, but actual is '" + intervalNum + "'"); } catch (IllegalArgumentException e1) { throw new UserDataException("The parameter 'INTERVAL' invalid, the pattern is 'interval_num[:interval_unit]'"); - } catch (UserDataException e2) { - throw e2; } + + //parse any date values try { if (partitionType == PartitionType.DATE) { SimpleDateFormat df = new SimpleDateFormat("yyyy-MM-dd"); @@ -144,7 +158,7 @@ public class JdbcPartitionFragmenter extends Fragmenter { rangeEnd.setTime(df.parse(range[1])); } } catch (ParseException e) { - throw new UserDataException("The parameter 'RANGE' include invalid date format."); + throw new UserDataException("The parameter 'RANGE' has invalid date format. Expected format is 'YYYY-MM-DD'"); } } http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/b7f9f36f/pxf/pxf-jdbc/src/test/java/org/apache/hawq/pxf/plugins/jdbc/JdbcPartitionFragmenterTest.java ---------------------------------------------------------------------- diff --git a/pxf/pxf-jdbc/src/test/java/org/apache/hawq/pxf/plugins/jdbc/JdbcPartitionFragmenterTest.java b/pxf/pxf-jdbc/src/test/java/org/apache/hawq/pxf/plugins/jdbc/JdbcPartitionFragmenterTest.java index b7a7493..6785af6 100644 --- a/pxf/pxf-jdbc/src/test/java/org/apache/hawq/pxf/plugins/jdbc/JdbcPartitionFragmenterTest.java +++ b/pxf/pxf-jdbc/src/test/java/org/apache/hawq/pxf/plugins/jdbc/JdbcPartitionFragmenterTest.java @@ -23,9 +23,9 @@ import org.apache.hawq.pxf.api.Fragment; import org.apache.hawq.pxf.api.UserDataException; import org.apache.hawq.pxf.api.utilities.InputData; import org.apache.hawq.pxf.plugins.jdbc.utils.ByteUtil; +import org.junit.Before; import org.junit.Test; -import java.text.ParseException; import java.util.Calendar; import java.util.List; @@ -37,17 +37,22 @@ import static org.mockito.Mockito.when; public class JdbcPartitionFragmenterTest { InputData inputData; - @Test - public void testPartionByDateOfMonth() throws Exception { + @Before + public void setUp() throws Exception { prepareConstruction(); when(inputData.getDataSource()).thenReturn("sales"); + } + + @Test + public void testPartionByDateOfMonth() throws Exception { + when(inputData.getUserProperty("PARTITION_BY")).thenReturn("cdate:date"); when(inputData.getUserProperty("RANGE")).thenReturn("2008-01-01:2009-01-01"); when(inputData.getUserProperty("INTERVAL")).thenReturn("1:month"); JdbcPartitionFragmenter fragment = new JdbcPartitionFragmenter(inputData); List<Fragment> fragments = fragment.getFragments(); - assertEquals(fragments.size(), 12); + assertEquals(12, fragments.size()); //fragment - 1 byte[] fragMeta = fragments.get(0).getMetadata(); @@ -68,67 +73,64 @@ public class JdbcPartitionFragmenterTest { //when end_date > start_date when(inputData.getUserProperty("RANGE")).thenReturn("2008-01-01:2001-01-01"); fragment = new JdbcPartitionFragmenter(inputData); - assertEquals(0, fragment.getFragments().size()); + fragments = fragment.getFragments(); + assertEquals(0, fragments.size()); } @Test public void testPartionByDateOfYear() throws Exception { - prepareConstruction(); - when(inputData.getDataSource()).thenReturn("sales"); + when(inputData.getUserProperty("PARTITION_BY")).thenReturn("cdate:date"); when(inputData.getUserProperty("RANGE")).thenReturn("2008-01-01:2011-01-01"); when(inputData.getUserProperty("INTERVAL")).thenReturn("1:year"); JdbcPartitionFragmenter fragment = new JdbcPartitionFragmenter(inputData); List<Fragment> fragments = fragment.getFragments(); - assertEquals(fragments.size(), 3); + assertEquals(3, fragments.size()); } @Test public void testPartionByInt() throws Exception { - prepareConstruction(); - when(inputData.getDataSource()).thenReturn("sales"); + when(inputData.getUserProperty("PARTITION_BY")).thenReturn("year:int"); when(inputData.getUserProperty("RANGE")).thenReturn("2001:2012"); when(inputData.getUserProperty("INTERVAL")).thenReturn("2"); JdbcPartitionFragmenter fragment = new JdbcPartitionFragmenter(inputData); List<Fragment> fragments = fragment.getFragments(); - assertEquals(fragments.size(), 6); + assertEquals(6, fragments.size()); //fragment - 1 byte[] fragMeta = fragments.get(0).getMetadata(); byte[][] newBytes = ByteUtil.splitBytes(fragMeta, 4); int fragStart = ByteUtil.toInt(newBytes[0]); int fragEnd = ByteUtil.toInt(newBytes[1]); - assertEquals(fragStart, 2001); - assertEquals(fragEnd, 2003); + assertEquals(2001, fragStart); + assertEquals(2003, fragEnd); //fragment - 6 fragMeta = fragments.get(5).getMetadata(); newBytes = ByteUtil.splitBytes(fragMeta, 4); fragStart = ByteUtil.toInt(newBytes[0]); fragEnd = ByteUtil.toInt(newBytes[1]); - assertEquals(fragStart, 2011); - assertEquals(fragEnd, 2012); + assertEquals(2011, fragStart); + assertEquals(2012, fragEnd); //when end > start when(inputData.getUserProperty("RANGE")).thenReturn("2013:2012"); fragment = new JdbcPartitionFragmenter(inputData); assertEquals(0, fragment.getFragments().size()); - } @Test public void testPartionByEnum() throws Exception { - prepareConstruction(); - when(inputData.getDataSource()).thenReturn("sales"); + when(inputData.getUserProperty("PARTITION_BY")).thenReturn("level:enum"); when(inputData.getUserProperty("RANGE")).thenReturn("excellent:good:general:bad"); JdbcPartitionFragmenter fragment = new JdbcPartitionFragmenter(inputData); List<Fragment> fragments = fragment.getFragments(); - assertEquals(fragments.size(), 4); + assertEquals(4, fragments.size()); //fragment - 1 byte[] fragMeta = fragments.get(0).getMetadata(); @@ -139,85 +141,125 @@ public class JdbcPartitionFragmenterTest { assertEquals("bad", new String(fragMeta)); } - @Test - public void inValidPartitiontype() throws Exception { - prepareConstruction(); - when(inputData.getDataSource()).thenReturn("sales"); + @Test(expected = UserDataException.class) + public void testInValidPartitiontype() throws Exception { + when(inputData.getUserProperty("PARTITION_BY")).thenReturn("level:float"); when(inputData.getUserProperty("RANGE")).thenReturn("100:200"); - try { - new JdbcPartitionFragmenter(inputData); - fail("Expected an IllegalArgumentException"); - } catch (UserDataException ex) { - - } + new JdbcPartitionFragmenter(inputData); } - @Test - public void inValidParameterFormat() throws Exception { - prepareConstruction(); - when(inputData.getDataSource()).thenReturn("sales"); + @Test(expected = UserDataException.class) + public void testInValidParameterFormat() throws Exception { //PARTITION_BY must be comma-delimited string when(inputData.getUserProperty("PARTITION_BY")).thenReturn("level-enum"); when(inputData.getUserProperty("RANGE")).thenReturn("100:200"); - try { - new JdbcPartitionFragmenter(inputData); - fail("Expected an ArrayIndexOutOfBoundsException"); - } catch (ArrayIndexOutOfBoundsException ex) { - } + + new JdbcPartitionFragmenter(inputData); + } + + @Test(expected = UserDataException.class) + public void testInValidDateFormat() throws Exception { //date string must be yyyy-MM-dd when(inputData.getUserProperty("PARTITION_BY")).thenReturn("cdate:date"); when(inputData.getUserProperty("RANGE")).thenReturn("2008/01/01:2009-01-01"); when(inputData.getUserProperty("INTERVAL")).thenReturn("1:month"); - try { - JdbcPartitionFragmenter fragment = new JdbcPartitionFragmenter(inputData); - fragment.getFragments(); - fail("Expected an ParseException"); - } catch (UserDataException ex) { - } + + new JdbcPartitionFragmenter(inputData).getFragments(); } - @Test - public void inValidParameterValue() throws Exception { - prepareConstruction(); + @Test(expected = UserDataException.class) + public void testInValidParameterValue() throws Exception { + //INTERVAL must be greater than 0 when(inputData.getUserProperty("PARTITION_BY")).thenReturn("cdate:date"); when(inputData.getUserProperty("RANGE")).thenReturn("2008-01-01:2009-01-01"); when(inputData.getUserProperty("INTERVAL")).thenReturn("-1:month"); - try { - new JdbcPartitionFragmenter(inputData); - fail("Expected an UserDataException"); - } catch (UserDataException ex) { - } + + new JdbcPartitionFragmenter(inputData); } - @Test - public void inValidIntervaltype() throws Exception { - prepareConstruction(); - when(inputData.getDataSource()).thenReturn("sales"); + @Test(expected = UserDataException.class) + public void testInValidIntervaltype() throws Exception { + when(inputData.getUserProperty("PARTITION_BY")).thenReturn("cdate:date"); when(inputData.getUserProperty("RANGE")).thenReturn("2008-01-01:2011-01-01"); when(inputData.getUserProperty("INTERVAL")).thenReturn("6:hour"); - try { - JdbcPartitionFragmenter fragment = new JdbcPartitionFragmenter(inputData); - fragment.getFragments(); - fail("Expected an UserDataException"); - } catch (UserDataException ex) { - } + new JdbcPartitionFragmenter(inputData).getFragments(); + } + + @Test(expected = UserDataException.class) + public void testIntervaltypeMissing() throws Exception { + + when(inputData.getUserProperty("PARTITION_BY")).thenReturn("cdate:date"); + when(inputData.getUserProperty("RANGE")).thenReturn("2008-01-01:2011-01-01"); + when(inputData.getUserProperty("INTERVAL")).thenReturn("6"); + + new JdbcPartitionFragmenter(inputData).getFragments(); + } + + @Test + public void testIntervaltypeMissingValid() throws Exception { + + when(inputData.getUserProperty("PARTITION_BY")).thenReturn("year:int"); + when(inputData.getUserProperty("RANGE")).thenReturn("2001:2012"); + when(inputData.getUserProperty("INTERVAL")).thenReturn("1"); + + JdbcPartitionFragmenter fragment = new JdbcPartitionFragmenter(inputData); + List<Fragment> fragments = fragment.getFragments(); + assertEquals(11, fragments.size()); + } + + @Test + public void testIntervalMissingEnum() throws Exception { + + when(inputData.getUserProperty("PARTITION_BY")).thenReturn("level:enum"); + when(inputData.getUserProperty("RANGE")).thenReturn("100:200:300"); + + JdbcPartitionFragmenter fragment = new JdbcPartitionFragmenter(inputData); + List<Fragment> fragments = fragment.getFragments(); + assertEquals(3, fragments.size()); + } + + @Test(expected = UserDataException.class) + public void testRangeMissingEndValue() throws Exception { + when(inputData.getUserProperty("PARTITION_BY")).thenReturn("cdate:date"); + when(inputData.getUserProperty("RANGE")).thenReturn("2008-01-01"); + when(inputData.getUserProperty("INTERVAL")).thenReturn("1:year"); + + new JdbcPartitionFragmenter(inputData).getFragments(); + } + + @Test(expected = UserDataException.class) + public void testRangeMissing() throws Exception { + + when(inputData.getUserProperty("PARTITION_BY")).thenReturn("year:int"); + when(inputData.getUserProperty("INTERVAL")).thenReturn("1"); + + new JdbcPartitionFragmenter(inputData).getFragments(); + } + + @Test + public void testRangeSingleValueEnum() throws Exception { + + when(inputData.getUserProperty("PARTITION_BY")).thenReturn("level:enum"); + when(inputData.getUserProperty("RANGE")).thenReturn("100"); + + JdbcPartitionFragmenter fragment = new JdbcPartitionFragmenter(inputData); + List<Fragment> fragments = fragment.getFragments(); + assertEquals(1, fragments.size()); } @Test public void testNoPartition() throws Exception { - prepareConstruction(); - when(inputData.getDataSource()).thenReturn("sales"); JdbcPartitionFragmenter fragment = new JdbcPartitionFragmenter(inputData); List<Fragment> fragments = fragment.getFragments(); - assertEquals(fragments.size(), 1); + assertEquals(1, fragments.size()); } private void assertDateEquals(long date, int year, int month, int day) { @@ -230,6 +272,5 @@ public class JdbcPartitionFragmenterTest { private void prepareConstruction() throws Exception { inputData = mock(InputData.class); - } }
