[jira] [Commented] (PHOENIX-1074) ParallelIteratorRegionSplitterFactory get Splits is not rational

2014-08-22 Thread James Taylor (JIRA)

[ 
https://issues.apache.org/jira/browse/PHOENIX-1074?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=14107118#comment-14107118
 ] 

James Taylor commented on PHOENIX-1074:
---

[~jaywong] - we found and fixed several issues with the intersect method that 
may have been causing you problems. If you have a chance, would you mind trying 
it with our latest 3.1 RC2 and/or 4.1 RC0?

Also, FWIW, we're improving on our parallelization algorithm in 4.2: see 
PHOENIX-180.

 ParallelIteratorRegionSplitterFactory get Splits is not rational
 

 Key: PHOENIX-1074
 URL: https://issues.apache.org/jira/browse/PHOENIX-1074
 Project: Phoenix
  Issue Type: Bug
Reporter: jay wong
 Attachments: SkipScanFilterSaltedIntersectTest.java


 create a table 
 {code}
 create table if not exists table1(
   gmt VARCHAR NOT NULL, 
   spm_type VARCHAR NOT NULL, 
   spm VARCHAR NOT NULL, 
   A.int_a INTEGER, 
   B.int_b INTEGER, 
   B.int_c INTEGER 
   CONSTRAINT pk PRIMARY KEY (gmt, spm_type, spm)) SALT_BUCKETS = 4, 
 bloomfilter='ROW';
 {code}
 and made the table 29 partitions as this.
 |startrow|endrow|
 | |\x0020140201|
 |\x0020140201|\x0020140202|
 |\x0020140202|\x0020140203|
 |\x0020140203|\x0020140204|
 |\x0020140204|\x0020140205|   
 |\x0020140205|\x0020140206|   
 |\x0020140206|\x0020140207|
 |\x0020140207|\x0120140201|
 |\x0120140201|\x0120140202|
 |\x0120140202|\x0120140203|
 |\x0120140203|\x0120140204|
 |\x0120140204|\x0120140205|
 |\x0120140205|\x0120140206|
 |\x0120140206|\x0120140207|
 |\x0120140207|\x0220140201|
 |\x0220140201|\x0220140202|
 |\x0220140202|\x0220140203|
 |\x0220140203|\x0220140204|
 |\x0220140204|\x0220140205|
 |\x0220140205|\x0220140206|
 |\x0220140206|\x0220140207|
 |\x0220140207|\x0320140201|
 |\x0320140201|\x0320140202|
 |\x0320140202|\x0320140203|
 |\x0320140203|\x0320140204|
 |\x0320140204|\x0320140205|
 |\x0320140205|\x0320140206|
 |\x0320140206|\x0320140207|
 |\x0320140207| |  
 Then insert some data;
 |GMT |  SPM_TYPE  |SPM |   INT_A|   INT_B|   INT_C
 |
 | 20140201   | 1  | 1.2.3.4546 | 218| 218| null   
 |
 | 20140201   | 1  | 1.2.44545  | 190| 190| null   
 |
 | 20140201   | 1  | 1.353451312 | 246| 246| null  
  |
 | 20140201   | 2  | 1.2.3.6775 | 183| 183| null   
 |
 |...|...|...|...|...|...|
 | 20140207   | 3  | 1.2.3.4546 | 224| 224| null   
 |
 | 20140207   | 3  | 1.2.44545  | 196| 196| null   
 |
 | 20140207   | 3  | 1.353451312 | 168| 168| null  
  |
 | 20140207   | 4  | 1.2.3.6775 | 189| 189| null   
 |
 | 20140207   | 4  | 1.23.345345 | 217| 217| null  
  |
 | 20140207   | 4  | 1.23234234234 | 245| 245| null
|
 print a log like this
 {code}
 public class ParallelIterators extends ExplainTable implements 
 ResultIterators {
 
  @Override
 public ListPeekingResultIterator getIterators() throws SQLException {
 boolean success = false;
 final ConnectionQueryServices services = 
 context.getConnection().getQueryServices();
 ReadOnlyProps props = services.getProps();
 int numSplits = splits.size();
 ListPeekingResultIterator iterators = new 
 ArrayListPeekingResultIterator(numSplits);
 ListPairbyte[],FuturePeekingResultIterator futures = new 
 ArrayListPairbyte[],FuturePeekingResultIterator(numSplits);
 final UUID scanId = UUID.randomUUID();
 try {
 ExecutorService executor = services.getExecutor();
 System.out.println(the split size is  + numSplits);
  
  }
 }
 {code}
 then execute some sql 
 {code}
 select * from table1 where gmt  '20140202' and gmt  '20140207' and spm_type 
 = '2' and spm like '1.%'
 the split size is 31
 select * from table1 where gmt  '20140202' and gmt  '20140207' and spm_type 
 = '2'
 the split size is 31
 select * from table1 where gmt  '20140202' and gmt  '20140207'
 the split size is 27
 select * from table1 where gmt  '20140202' and gmt  '20140204' and spm_type 
 = '2' and spm like '1.%'
 the split size is 28
 select * from table1 where gmt  '20140202' and gmt  '20140204' and spm_type 
 = '2'
 the split size is 28
 select * from table1 where gmt  '20140202' and gmt  '20140204'
 the split size is 12
 {code}
 but I think 
 {code}
 select * from table1 where gmt  '20140202' and gmt  '20140207' and spm_type 
 = '2' and spm like '1.%'
 {code}
 and 
 {code}
 select * from table1 where gmt  '20140202' and gmt  '20140207' 
 {code}
 the two sql will has the same split , but why not?



--
This message was sent by Atlassian JIRA

[jira] [Commented] (PHOENIX-1074) ParallelIteratorRegionSplitterFactory get Splits is not rational

2014-07-21 Thread jay wong (JIRA)

[ 
https://issues.apache.org/jira/browse/PHOENIX-1074?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=14068585#comment-14068585
 ] 

jay wong commented on PHOENIX-1074:
---

[~jamestaylor]

add a test case. the second testcase can attest the problem

I know the rule is hard to correct. but it's not advisable

 ParallelIteratorRegionSplitterFactory get Splits is not rational
 

 Key: PHOENIX-1074
 URL: https://issues.apache.org/jira/browse/PHOENIX-1074
 Project: Phoenix
  Issue Type: Bug
Reporter: jay wong
 Attachments: SkipScanFilterSaltedIntersectTest.java


 create a table 
 {code}
 create table if not exists table1(
   gmt VARCHAR NOT NULL, 
   spm_type VARCHAR NOT NULL, 
   spm VARCHAR NOT NULL, 
   A.int_a INTEGER, 
   B.int_b INTEGER, 
   B.int_c INTEGER 
   CONSTRAINT pk PRIMARY KEY (gmt, spm_type, spm)) SALT_BUCKETS = 4, 
 bloomfilter='ROW';
 {code}
 and made the table 29 partitions as this.
 |startrow|endrow|
 | |\x0020140201|
 |\x0020140201|\x0020140202|
 |\x0020140202|\x0020140203|
 |\x0020140203|\x0020140204|
 |\x0020140204|\x0020140205|   
 |\x0020140205|\x0020140206|   
 |\x0020140206|\x0020140207|
 |\x0020140207|\x0120140201|
 |\x0120140201|\x0120140202|
 |\x0120140202|\x0120140203|
 |\x0120140203|\x0120140204|
 |\x0120140204|\x0120140205|
 |\x0120140205|\x0120140206|
 |\x0120140206|\x0120140207|
 |\x0120140207|\x0220140201|
 |\x0220140201|\x0220140202|
 |\x0220140202|\x0220140203|
 |\x0220140203|\x0220140204|
 |\x0220140204|\x0220140205|
 |\x0220140205|\x0220140206|
 |\x0220140206|\x0220140207|
 |\x0220140207|\x0320140201|
 |\x0320140201|\x0320140202|
 |\x0320140202|\x0320140203|
 |\x0320140203|\x0320140204|
 |\x0320140204|\x0320140205|
 |\x0320140205|\x0320140206|
 |\x0320140206|\x0320140207|
 |\x0320140207| |  
 Then insert some data;
 |GMT |  SPM_TYPE  |SPM |   INT_A|   INT_B|   INT_C
 |
 | 20140201   | 1  | 1.2.3.4546 | 218| 218| null   
 |
 | 20140201   | 1  | 1.2.44545  | 190| 190| null   
 |
 | 20140201   | 1  | 1.353451312 | 246| 246| null  
  |
 | 20140201   | 2  | 1.2.3.6775 | 183| 183| null   
 |
 |...|...|...|...|...|...|
 | 20140207   | 3  | 1.2.3.4546 | 224| 224| null   
 |
 | 20140207   | 3  | 1.2.44545  | 196| 196| null   
 |
 | 20140207   | 3  | 1.353451312 | 168| 168| null  
  |
 | 20140207   | 4  | 1.2.3.6775 | 189| 189| null   
 |
 | 20140207   | 4  | 1.23.345345 | 217| 217| null  
  |
 | 20140207   | 4  | 1.23234234234 | 245| 245| null
|
 print a log like this
 {code}
 public class ParallelIterators extends ExplainTable implements 
 ResultIterators {
 
  @Override
 public ListPeekingResultIterator getIterators() throws SQLException {
 boolean success = false;
 final ConnectionQueryServices services = 
 context.getConnection().getQueryServices();
 ReadOnlyProps props = services.getProps();
 int numSplits = splits.size();
 ListPeekingResultIterator iterators = new 
 ArrayListPeekingResultIterator(numSplits);
 ListPairbyte[],FuturePeekingResultIterator futures = new 
 ArrayListPairbyte[],FuturePeekingResultIterator(numSplits);
 final UUID scanId = UUID.randomUUID();
 try {
 ExecutorService executor = services.getExecutor();
 System.out.println(the split size is  + numSplits);
  
  }
 }
 {code}
 then execute some sql 
 {code}
 select * from table1 where gmt  '20140202' and gmt  '20140207' and spm_type 
 = '2' and spm like '1.%'
 the split size is 31
 select * from table1 where gmt  '20140202' and gmt  '20140207' and spm_type 
 = '2'
 the split size is 31
 select * from table1 where gmt  '20140202' and gmt  '20140207'
 the split size is 27
 select * from table1 where gmt  '20140202' and gmt  '20140204' and spm_type 
 = '2' and spm like '1.%'
 the split size is 28
 select * from table1 where gmt  '20140202' and gmt  '20140204' and spm_type 
 = '2'
 the split size is 28
 select * from table1 where gmt  '20140202' and gmt  '20140204'
 the split size is 12
 {code}
 but I think 
 {code}
 select * from table1 where gmt  '20140202' and gmt  '20140207' and spm_type 
 = '2' and spm like '1.%'
 {code}
 and 
 {code}
 select * from table1 where gmt  '20140202' and gmt  '20140207' 
 {code}
 the two sql will has the same split , but why not?



--
This message was sent by Atlassian JIRA
(v6.2#6252)


[jira] [Commented] (PHOENIX-1074) ParallelIteratorRegionSplitterFactory get Splits is not rational

2014-07-10 Thread jay wong (JIRA)

[ 
https://issues.apache.org/jira/browse/PHOENIX-1074?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=14057369#comment-14057369
 ] 

jay wong commented on PHOENIX-1074:
---

[~jamestaylor]
please check my problem again.
this is my primary key. and salt_buckets.
{code}
CONSTRAINT pk PRIMARY KEY (gmt, spm_type, spm)) SALT_BUCKETS = 4
{code}

{code}
select * from table1 where gmt  '20140202' and gmt  '20140204'

the split size is 12  (is logical)
{code}

{code}
select * from table1 where gmt  '20140202' and gmt  '20140204' and spm_type = 
'2'

the split size is 28(I think the split size is also 12 is logical)
{code}

this is only a epitome.

In my online table . has 1900 regions.

If it's run with logical splits policy, only has nearly 20 splits.

BUT it has 1900 splits



 ParallelIteratorRegionSplitterFactory get Splits is not rational
 

 Key: PHOENIX-1074
 URL: https://issues.apache.org/jira/browse/PHOENIX-1074
 Project: Phoenix
  Issue Type: Bug
Reporter: jay wong

 create a table 
 {code}
 create table if not exists table1(
   gmt VARCHAR NOT NULL, 
   spm_type VARCHAR NOT NULL, 
   spm VARCHAR NOT NULL, 
   A.int_a INTEGER, 
   B.int_b INTEGER, 
   B.int_c INTEGER 
   CONSTRAINT pk PRIMARY KEY (gmt, spm_type, spm)) SALT_BUCKETS = 4, 
 bloomfilter='ROW';
 {code}
 and made the table 29 partitions as this.
 |startrow|endrow|
 | |\x0020140201|
 |\x0020140201|\x0020140202|
 |\x0020140202|\x0020140203|
 |\x0020140203|\x0020140204|
 |\x0020140204|\x0020140205|   
 |\x0020140205|\x0020140206|   
 |\x0020140206|\x0020140207|
 |\x0020140207|\x0120140201|
 |\x0120140201|\x0120140202|
 |\x0120140202|\x0120140203|
 |\x0120140203|\x0120140204|
 |\x0120140204|\x0120140205|
 |\x0120140205|\x0120140206|
 |\x0120140206|\x0120140207|
 |\x0120140207|\x0220140201|
 |\x0220140201|\x0220140202|
 |\x0220140202|\x0220140203|
 |\x0220140203|\x0220140204|
 |\x0220140204|\x0220140205|
 |\x0220140205|\x0220140206|
 |\x0220140206|\x0220140207|
 |\x0220140207|\x0320140201|
 |\x0320140201|\x0320140202|
 |\x0320140202|\x0320140203|
 |\x0320140203|\x0320140204|
 |\x0320140204|\x0320140205|
 |\x0320140205|\x0320140206|
 |\x0320140206|\x0320140207|
 |\x0320140207| |  
 Then insert some data;
 |GMT |  SPM_TYPE  |SPM |   INT_A|   INT_B|   INT_C
 |
 | 20140201   | 1  | 1.2.3.4546 | 218| 218| null   
 |
 | 20140201   | 1  | 1.2.44545  | 190| 190| null   
 |
 | 20140201   | 1  | 1.353451312 | 246| 246| null  
  |
 | 20140201   | 2  | 1.2.3.6775 | 183| 183| null   
 |
 |...|...|...|...|...|...|
 | 20140207   | 3  | 1.2.3.4546 | 224| 224| null   
 |
 | 20140207   | 3  | 1.2.44545  | 196| 196| null   
 |
 | 20140207   | 3  | 1.353451312 | 168| 168| null  
  |
 | 20140207   | 4  | 1.2.3.6775 | 189| 189| null   
 |
 | 20140207   | 4  | 1.23.345345 | 217| 217| null  
  |
 | 20140207   | 4  | 1.23234234234 | 245| 245| null
|
 print a log like this
 {code}
 public class ParallelIterators extends ExplainTable implements 
 ResultIterators {
 
  @Override
 public ListPeekingResultIterator getIterators() throws SQLException {
 boolean success = false;
 final ConnectionQueryServices services = 
 context.getConnection().getQueryServices();
 ReadOnlyProps props = services.getProps();
 int numSplits = splits.size();
 ListPeekingResultIterator iterators = new 
 ArrayListPeekingResultIterator(numSplits);
 ListPairbyte[],FuturePeekingResultIterator futures = new 
 ArrayListPairbyte[],FuturePeekingResultIterator(numSplits);
 final UUID scanId = UUID.randomUUID();
 try {
 ExecutorService executor = services.getExecutor();
 System.out.println(the split size is  + numSplits);
  
  }
 }
 {code}
 then execute some sql 
 {code}
 select * from table1 where gmt  '20140202' and gmt  '20140207' and spm_type 
 = '2' and spm like '1.%'
 the split size is 31
 select * from table1 where gmt  '20140202' and gmt  '20140207' and spm_type 
 = '2'
 the split size is 31
 select * from table1 where gmt  '20140202' and gmt  '20140207'
 the split size is 27
 select * from table1 where gmt  '20140202' and gmt  '20140204' and spm_type 
 = '2' and spm like '1.%'
 the split size is 28
 select * from table1 where gmt  '20140202' and gmt  '20140204' and spm_type 
 = '2'
 the split size is 28
 select * from table1 where gmt  '20140202' and gmt  '20140204'
 the split size is 12
 {code}
 but I think 
 {code}
 select * from table1 where gmt  '20140202' and gmt  

[jira] [Commented] (PHOENIX-1074) ParallelIteratorRegionSplitterFactory get Splits is not rational

2014-07-10 Thread James Taylor (JIRA)

[ 
https://issues.apache.org/jira/browse/PHOENIX-1074?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=14057436#comment-14057436
 ] 

James Taylor commented on PHOENIX-1074:
---

The second query is using a skip scan because there's range information for all 
column in your PK:
{code}
select * from table1 where gmt  '20140202' and gmt  '20140207' and spm_type = 
'2' and spm like '1.%'
{code}
So it'll run the skip scan over all regions since the table is salted. How is 
performance for this query? 

You can force it to do a range scan with a hint like this:
{code}
select /*+ RANGE_SCAN */ from table1 where gmt  '20140202' and gmt  
'20140207' and spm_type = '2' and spm like '1.%'
{code}

Please let us know how performance compares between the two.


 ParallelIteratorRegionSplitterFactory get Splits is not rational
 

 Key: PHOENIX-1074
 URL: https://issues.apache.org/jira/browse/PHOENIX-1074
 Project: Phoenix
  Issue Type: Bug
Reporter: jay wong

 create a table 
 {code}
 create table if not exists table1(
   gmt VARCHAR NOT NULL, 
   spm_type VARCHAR NOT NULL, 
   spm VARCHAR NOT NULL, 
   A.int_a INTEGER, 
   B.int_b INTEGER, 
   B.int_c INTEGER 
   CONSTRAINT pk PRIMARY KEY (gmt, spm_type, spm)) SALT_BUCKETS = 4, 
 bloomfilter='ROW';
 {code}
 and made the table 29 partitions as this.
 |startrow|endrow|
 | |\x0020140201|
 |\x0020140201|\x0020140202|
 |\x0020140202|\x0020140203|
 |\x0020140203|\x0020140204|
 |\x0020140204|\x0020140205|   
 |\x0020140205|\x0020140206|   
 |\x0020140206|\x0020140207|
 |\x0020140207|\x0120140201|
 |\x0120140201|\x0120140202|
 |\x0120140202|\x0120140203|
 |\x0120140203|\x0120140204|
 |\x0120140204|\x0120140205|
 |\x0120140205|\x0120140206|
 |\x0120140206|\x0120140207|
 |\x0120140207|\x0220140201|
 |\x0220140201|\x0220140202|
 |\x0220140202|\x0220140203|
 |\x0220140203|\x0220140204|
 |\x0220140204|\x0220140205|
 |\x0220140205|\x0220140206|
 |\x0220140206|\x0220140207|
 |\x0220140207|\x0320140201|
 |\x0320140201|\x0320140202|
 |\x0320140202|\x0320140203|
 |\x0320140203|\x0320140204|
 |\x0320140204|\x0320140205|
 |\x0320140205|\x0320140206|
 |\x0320140206|\x0320140207|
 |\x0320140207| |  
 Then insert some data;
 |GMT |  SPM_TYPE  |SPM |   INT_A|   INT_B|   INT_C
 |
 | 20140201   | 1  | 1.2.3.4546 | 218| 218| null   
 |
 | 20140201   | 1  | 1.2.44545  | 190| 190| null   
 |
 | 20140201   | 1  | 1.353451312 | 246| 246| null  
  |
 | 20140201   | 2  | 1.2.3.6775 | 183| 183| null   
 |
 |...|...|...|...|...|...|
 | 20140207   | 3  | 1.2.3.4546 | 224| 224| null   
 |
 | 20140207   | 3  | 1.2.44545  | 196| 196| null   
 |
 | 20140207   | 3  | 1.353451312 | 168| 168| null  
  |
 | 20140207   | 4  | 1.2.3.6775 | 189| 189| null   
 |
 | 20140207   | 4  | 1.23.345345 | 217| 217| null  
  |
 | 20140207   | 4  | 1.23234234234 | 245| 245| null
|
 print a log like this
 {code}
 public class ParallelIterators extends ExplainTable implements 
 ResultIterators {
 
  @Override
 public ListPeekingResultIterator getIterators() throws SQLException {
 boolean success = false;
 final ConnectionQueryServices services = 
 context.getConnection().getQueryServices();
 ReadOnlyProps props = services.getProps();
 int numSplits = splits.size();
 ListPeekingResultIterator iterators = new 
 ArrayListPeekingResultIterator(numSplits);
 ListPairbyte[],FuturePeekingResultIterator futures = new 
 ArrayListPairbyte[],FuturePeekingResultIterator(numSplits);
 final UUID scanId = UUID.randomUUID();
 try {
 ExecutorService executor = services.getExecutor();
 System.out.println(the split size is  + numSplits);
  
  }
 }
 {code}
 then execute some sql 
 {code}
 select * from table1 where gmt  '20140202' and gmt  '20140207' and spm_type 
 = '2' and spm like '1.%'
 the split size is 31
 select * from table1 where gmt  '20140202' and gmt  '20140207' and spm_type 
 = '2'
 the split size is 31
 select * from table1 where gmt  '20140202' and gmt  '20140207'
 the split size is 27
 select * from table1 where gmt  '20140202' and gmt  '20140204' and spm_type 
 = '2' and spm like '1.%'
 the split size is 28
 select * from table1 where gmt  '20140202' and gmt  '20140204' and spm_type 
 = '2'
 the split size is 28
 select * from table1 where gmt  '20140202' and gmt  '20140204'
 the split size is 12
 {code}
 but I think 
 {code}
 select * from table1 where gmt  '20140202' and gmt  '20140207' and