[jira] [Updated] (PHOENIX-1074) ParallelIteratorRegionSplitterFactory get Splits is not rational

jay wong (JIRA) Wed, 09 Jul 2014 19:04:14 -0700

     [ 
https://issues.apache.org/jira/browse/PHOENIX-1074?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]


jay wong updated PHOENIX-1074:
------------------------------

    Description: 
create a table 
{code}
create table if not exists table1(
  gmt VARCHAR NOT NULL, 
  spm_type VARCHAR NOT NULL, 
  spm VARCHAR NOT NULL, 
  A.int_a INTEGER, 
  B.int_b INTEGER, 
  B.int_c INTEGER 
  CONSTRAINT pk PRIMARY KEY (gmt, spm_type, spm)) SALT_BUCKETS = 4, 
bloomfilter='ROW';
{code}
and made the table 29 partitions as this.
|startrow|endrow|
| |\x0020140201|
|\x0020140201|\x0020140202|
|\x0020140202|\x0020140203|
|\x0020140203|\x0020140204|
|\x0020140204|\x0020140205|     
|\x0020140205|\x0020140206|     
|\x0020140206|\x0020140207|
|\x0020140207|\x0120140201|
|\x0120140201|\x0120140202|
|\x0120140202|\x0120140203|
|\x0120140203|\x0120140204|
|\x0120140204|\x0120140205|
|\x0120140205|\x0120140206|
|\x0120140206|\x0120140207|
|\x0120140207|\x0220140201|
|\x0220140201|\x0220140202|
|\x0220140202|\x0220140203|
|\x0220140203|\x0220140204|
|\x0220140204|\x0220140205|
|\x0220140205|\x0220140206|
|\x0220140206|\x0220140207|
|\x0220140207|\x0320140201|
|\x0320140201|\x0320140202|
|\x0320140202|\x0320140203|
|\x0320140203|\x0320140204|
|\x0320140204|\x0320140205|
|\x0320140205|\x0320140206|
|\x0320140206|\x0320140207|
|\x0320140207| |                

Then insert some data;
|    GMT     |  SPM_TYPE  |    SPM     |   INT_A    |   INT_B    |   INT_C    |
| 20140201   | 1          | 1.2.3.4546 | 218        | 218        | null       |
| 20140201   | 1          | 1.2.44545  | 190        | 190        | null       |
| 20140201   | 1          | 1.353451312 | 246        | 246        | null       |
| 20140201   | 2          | 1.2.3.6775 | 183        | 183        | null       |
|...|...|...|...|...|...|
| 20140207   | 3          | 1.2.3.4546 | 224        | 224        | null       |
| 20140207   | 3          | 1.2.44545  | 196        | 196        | null       |
| 20140207   | 3          | 1.353451312 | 168        | 168        | null       |
| 20140207   | 4          | 1.2.3.6775 | 189        | 189        | null       |
| 20140207   | 4          | 1.23.345345 | 217        | 217        | null       |
| 20140207   | 4          | 1.23234234234 | 245        | 245        | null      
 |

print a log like this
{code}
public class ParallelIterators extends ExplainTable implements ResultIterators {
....
 @Override
    public List<PeekingResultIterator> getIterators() throws SQLException {
        boolean success = false;
        final ConnectionQueryServices services = 
context.getConnection().getQueryServices();
        ReadOnlyProps props = services.getProps();
        int numSplits = splits.size();
        List<PeekingResultIterator> iterators = new 
ArrayList<PeekingResultIterator>(numSplits);
        List<Pair<byte[],Future<PeekingResultIterator>>> futures = new 
ArrayList<Pair<byte[],Future<PeekingResultIterator>>>(numSplits);
        final UUID scanId = UUID.randomUUID();
        try {
            ExecutorService executor = services.getExecutor();
            System.out.println("the split size is " + numSplits);
             ....
     }
}
{code}

then execute some sql 
{code}
select * from table1 where gmt > '20140202' and gmt < '20140207' and spm_type = 
'2' and spm like '1.%'
the split size is 31
select * from table1 where gmt > '20140202' and gmt < '20140207' and spm_type = 
'2'
the split size is 31
select * from table1 where gmt > '20140202' and gmt < '20140207'
the split size is 27
select * from table1 where gmt > '20140202' and gmt < '20140204' and spm_type = 
'2' and spm like '1.%'
the split size is 28
select * from table1 where gmt > '20140202' and gmt < '20140204' and spm_type = 
'2'
the split size is 28
select * from table1 where gmt > '20140202' and gmt < '20140204'
the split size is 12
{code}

but I think 
{code}
select * from table1 where gmt > '20140202' and gmt < '20140207' and spm_type = 
'2' and spm like '1.%'
{code}
and 
{code}
select * from table1 where gmt > '20140202' and gmt < '20140207' 
{code}
the two sql will has the same split , but why not?




  was:
create a table 
{code}
create table if not exists table1(
  gmt VARCHAR NOT NULL, 
  spm_type VARCHAR NOT NULL, 
  spm VARCHAR NOT NULL, 
  A.int_a INTEGER, 
  B.int_b INTEGER, 
  B.int_c INTEGER 
  CONSTRAINT pk PRIMARY KEY (gmt, spm_type, spm)) SALT_BUCKETS = 4, 
bloomfilter='ROW';
{code}
and made the table partition as this.
|startrow|endrow|
| |\x0020140201|
|\x0020140201|\x0020140202|
|\x0020140202|\x0020140203|
|\x0020140203|\x0020140204|
|\x0020140204|\x0020140205|     
|\x0020140205|\x0020140206|     
|\x0020140206|\x0020140207|
|\x0020140207|\x0120140201|
|\x0120140201|\x0120140202|
|\x0120140202|\x0120140203|
|\x0120140203|\x0120140204|
|\x0120140204|\x0120140205|
|\x0120140205|\x0120140206|
|\x0120140206|\x0120140207|
|\x0120140207|\x0220140201|
|\x0220140201|\x0220140202|
|\x0220140202|\x0220140203|
|\x0220140203|\x0220140204|
|\x0220140204|\x0220140205|
|\x0220140205|\x0220140206|
|\x0220140206|\x0220140207|
|\x0220140207|\x0320140201|
|\x0320140201|\x0320140202|
|\x0320140202|\x0320140203|
|\x0320140203|\x0320140204|
|\x0320140204|\x0320140205|
|\x0320140205|\x0320140206|
|\x0320140206|\x0320140207|
|\x0320140207| |                

Then insert some data;
|    GMT     |  SPM_TYPE  |    SPM     |   INT_A    |   INT_B    |   INT_C    |
| 20140201   | 1          | 1.2.3.4546 | 218        | 218        | null       |
| 20140201   | 1          | 1.2.44545  | 190        | 190        | null       |
| 20140201   | 1          | 1.353451312 | 246        | 246        | null       |
| 20140201   | 2          | 1.2.3.6775 | 183        | 183        | null       |
|...|...|...|...|...|...|
| 20140207   | 3          | 1.2.3.4546 | 224        | 224        | null       |
| 20140207   | 3          | 1.2.44545  | 196        | 196        | null       |
| 20140207   | 3          | 1.353451312 | 168        | 168        | null       |
| 20140207   | 4          | 1.2.3.6775 | 189        | 189        | null       |
| 20140207   | 4          | 1.23.345345 | 217        | 217        | null       |
| 20140207   | 4          | 1.23234234234 | 245        | 245        | null      
 |

print a log like this
{code}
public class ParallelIterators extends ExplainTable implements ResultIterators {
....
 @Override
    public List<PeekingResultIterator> getIterators() throws SQLException {
        boolean success = false;
        final ConnectionQueryServices services = 
context.getConnection().getQueryServices();
        ReadOnlyProps props = services.getProps();
        int numSplits = splits.size();
        List<PeekingResultIterator> iterators = new 
ArrayList<PeekingResultIterator>(numSplits);
        List<Pair<byte[],Future<PeekingResultIterator>>> futures = new 
ArrayList<Pair<byte[],Future<PeekingResultIterator>>>(numSplits);
        final UUID scanId = UUID.randomUUID();
        try {
            ExecutorService executor = services.getExecutor();
            System.out.println("the split size is " + numSplits);
             ....
     }
}
{code}

then execute some sql 
{code}
select * from table1 where gmt > '20140202' and gmt < '20140207' and spm_type = 
'2' and spm like '1.%'
the split size is 31
select * from table1 where gmt > '20140202' and gmt < '20140207' and spm_type = 
'2'
the split size is 31
select * from table1 where gmt > '20140202' and gmt < '20140207'
the split size is 27
select * from table1 where gmt > '20140202' and gmt < '20140204' and spm_type = 
'2' and spm like '1.%'
the split size is 28
select * from table1 where gmt > '20140202' and gmt < '20140204' and spm_type = 
'2'
the split size is 28
select * from table1 where gmt > '20140202' and gmt < '20140204'
the split size is 12
{code}

but I think 
{code}
select * from table1 where gmt > '20140202' and gmt < '20140207' and spm_type = 
'2' and spm like '1.%'
{code}
and 
{code}
select * from table1 where gmt > '20140202' and gmt < '20140207' 
{code}
the two sql will has the same split , but why not?





> ParallelIteratorRegionSplitterFactory get Splits is not rational
> ----------------------------------------------------------------
>
>                 Key: PHOENIX-1074
>                 URL: https://issues.apache.org/jira/browse/PHOENIX-1074
>             Project: Phoenix
>          Issue Type: Wish
>            Reporter: jay wong
>
> create a table 
> {code}
> create table if not exists table1(
>   gmt VARCHAR NOT NULL, 
>   spm_type VARCHAR NOT NULL, 
>   spm VARCHAR NOT NULL, 
>   A.int_a INTEGER, 
>   B.int_b INTEGER, 
>   B.int_c INTEGER 
>   CONSTRAINT pk PRIMARY KEY (gmt, spm_type, spm)) SALT_BUCKETS = 4, 
> bloomfilter='ROW';
> {code}
> and made the table 29 partitions as this.
> |startrow|endrow|
> | |\x0020140201|
> |\x0020140201|\x0020140202|
> |\x0020140202|\x0020140203|
> |\x0020140203|\x0020140204|
> |\x0020140204|\x0020140205|   
> |\x0020140205|\x0020140206|   
> |\x0020140206|\x0020140207|
> |\x0020140207|\x0120140201|
> |\x0120140201|\x0120140202|
> |\x0120140202|\x0120140203|
> |\x0120140203|\x0120140204|
> |\x0120140204|\x0120140205|
> |\x0120140205|\x0120140206|
> |\x0120140206|\x0120140207|
> |\x0120140207|\x0220140201|
> |\x0220140201|\x0220140202|
> |\x0220140202|\x0220140203|
> |\x0220140203|\x0220140204|
> |\x0220140204|\x0220140205|
> |\x0220140205|\x0220140206|
> |\x0220140206|\x0220140207|
> |\x0220140207|\x0320140201|
> |\x0320140201|\x0320140202|
> |\x0320140202|\x0320140203|
> |\x0320140203|\x0320140204|
> |\x0320140204|\x0320140205|
> |\x0320140205|\x0320140206|
> |\x0320140206|\x0320140207|
> |\x0320140207| |              
> Then insert some data;
> |    GMT     |  SPM_TYPE  |    SPM     |   INT_A    |   INT_B    |   INT_C    
> |
> | 20140201   | 1          | 1.2.3.4546 | 218        | 218        | null       
> |
> | 20140201   | 1          | 1.2.44545  | 190        | 190        | null       
> |
> | 20140201   | 1          | 1.353451312 | 246        | 246        | null      
>  |
> | 20140201   | 2          | 1.2.3.6775 | 183        | 183        | null       
> |
> |...|...|...|...|...|...|
> | 20140207   | 3          | 1.2.3.4546 | 224        | 224        | null       
> |
> | 20140207   | 3          | 1.2.44545  | 196        | 196        | null       
> |
> | 20140207   | 3          | 1.353451312 | 168        | 168        | null      
>  |
> | 20140207   | 4          | 1.2.3.6775 | 189        | 189        | null       
> |
> | 20140207   | 4          | 1.23.345345 | 217        | 217        | null      
>  |
> | 20140207   | 4          | 1.23234234234 | 245        | 245        | null    
>    |
> print a log like this
> {code}
> public class ParallelIterators extends ExplainTable implements 
> ResultIterators {
> ....
>  @Override
>     public List<PeekingResultIterator> getIterators() throws SQLException {
>         boolean success = false;
>         final ConnectionQueryServices services = 
> context.getConnection().getQueryServices();
>         ReadOnlyProps props = services.getProps();
>         int numSplits = splits.size();
>         List<PeekingResultIterator> iterators = new 
> ArrayList<PeekingResultIterator>(numSplits);
>         List<Pair<byte[],Future<PeekingResultIterator>>> futures = new 
> ArrayList<Pair<byte[],Future<PeekingResultIterator>>>(numSplits);
>         final UUID scanId = UUID.randomUUID();
>         try {
>             ExecutorService executor = services.getExecutor();
>             System.out.println("the split size is " + numSplits);
>              ....
>      }
> }
> {code}
> then execute some sql 
> {code}
> select * from table1 where gmt > '20140202' and gmt < '20140207' and spm_type 
> = '2' and spm like '1.%'
> the split size is 31
> select * from table1 where gmt > '20140202' and gmt < '20140207' and spm_type 
> = '2'
> the split size is 31
> select * from table1 where gmt > '20140202' and gmt < '20140207'
> the split size is 27
> select * from table1 where gmt > '20140202' and gmt < '20140204' and spm_type 
> = '2' and spm like '1.%'
> the split size is 28
> select * from table1 where gmt > '20140202' and gmt < '20140204' and spm_type 
> = '2'
> the split size is 28
> select * from table1 where gmt > '20140202' and gmt < '20140204'
> the split size is 12
> {code}
> but I think 
> {code}
> select * from table1 where gmt > '20140202' and gmt < '20140207' and spm_type 
> = '2' and spm like '1.%'
> {code}
> and 
> {code}
> select * from table1 where gmt > '20140202' and gmt < '20140207' 
> {code}
> the two sql will has the same split , but why not?



--
This message was sent by Atlassian JIRA
(v6.2#6252)

[jira] [Updated] (PHOENIX-1074) ParallelIteratorRegionSplitterFactory get Splits is not rational

Reply via email to