[jira] [Commented] (PHOENIX-1074) ParallelIteratorRegionSplitterFactory get Splits is not rational
[ https://issues.apache.org/jira/browse/PHOENIX-1074?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=14107118#comment-14107118 ] James Taylor commented on PHOENIX-1074: --- [~jaywong] - we found and fixed several issues with the intersect method that may have been causing you problems. If you have a chance, would you mind trying it with our latest 3.1 RC2 and/or 4.1 RC0? Also, FWIW, we're improving on our parallelization algorithm in 4.2: see PHOENIX-180. ParallelIteratorRegionSplitterFactory get Splits is not rational Key: PHOENIX-1074 URL: https://issues.apache.org/jira/browse/PHOENIX-1074 Project: Phoenix Issue Type: Bug Reporter: jay wong Attachments: SkipScanFilterSaltedIntersectTest.java create a table {code} create table if not exists table1( gmt VARCHAR NOT NULL, spm_type VARCHAR NOT NULL, spm VARCHAR NOT NULL, A.int_a INTEGER, B.int_b INTEGER, B.int_c INTEGER CONSTRAINT pk PRIMARY KEY (gmt, spm_type, spm)) SALT_BUCKETS = 4, bloomfilter='ROW'; {code} and made the table 29 partitions as this. |startrow|endrow| | |\x0020140201| |\x0020140201|\x0020140202| |\x0020140202|\x0020140203| |\x0020140203|\x0020140204| |\x0020140204|\x0020140205| |\x0020140205|\x0020140206| |\x0020140206|\x0020140207| |\x0020140207|\x0120140201| |\x0120140201|\x0120140202| |\x0120140202|\x0120140203| |\x0120140203|\x0120140204| |\x0120140204|\x0120140205| |\x0120140205|\x0120140206| |\x0120140206|\x0120140207| |\x0120140207|\x0220140201| |\x0220140201|\x0220140202| |\x0220140202|\x0220140203| |\x0220140203|\x0220140204| |\x0220140204|\x0220140205| |\x0220140205|\x0220140206| |\x0220140206|\x0220140207| |\x0220140207|\x0320140201| |\x0320140201|\x0320140202| |\x0320140202|\x0320140203| |\x0320140203|\x0320140204| |\x0320140204|\x0320140205| |\x0320140205|\x0320140206| |\x0320140206|\x0320140207| |\x0320140207| | Then insert some data; |GMT | SPM_TYPE |SPM | INT_A| INT_B| INT_C | | 20140201 | 1 | 1.2.3.4546 | 218| 218| null | | 20140201 | 1 | 1.2.44545 | 190| 190| null | | 20140201 | 1 | 1.353451312 | 246| 246| null | | 20140201 | 2 | 1.2.3.6775 | 183| 183| null | |...|...|...|...|...|...| | 20140207 | 3 | 1.2.3.4546 | 224| 224| null | | 20140207 | 3 | 1.2.44545 | 196| 196| null | | 20140207 | 3 | 1.353451312 | 168| 168| null | | 20140207 | 4 | 1.2.3.6775 | 189| 189| null | | 20140207 | 4 | 1.23.345345 | 217| 217| null | | 20140207 | 4 | 1.23234234234 | 245| 245| null | print a log like this {code} public class ParallelIterators extends ExplainTable implements ResultIterators { @Override public ListPeekingResultIterator getIterators() throws SQLException { boolean success = false; final ConnectionQueryServices services = context.getConnection().getQueryServices(); ReadOnlyProps props = services.getProps(); int numSplits = splits.size(); ListPeekingResultIterator iterators = new ArrayListPeekingResultIterator(numSplits); ListPairbyte[],FuturePeekingResultIterator futures = new ArrayListPairbyte[],FuturePeekingResultIterator(numSplits); final UUID scanId = UUID.randomUUID(); try { ExecutorService executor = services.getExecutor(); System.out.println(the split size is + numSplits); } } {code} then execute some sql {code} select * from table1 where gmt '20140202' and gmt '20140207' and spm_type = '2' and spm like '1.%' the split size is 31 select * from table1 where gmt '20140202' and gmt '20140207' and spm_type = '2' the split size is 31 select * from table1 where gmt '20140202' and gmt '20140207' the split size is 27 select * from table1 where gmt '20140202' and gmt '20140204' and spm_type = '2' and spm like '1.%' the split size is 28 select * from table1 where gmt '20140202' and gmt '20140204' and spm_type = '2' the split size is 28 select * from table1 where gmt '20140202' and gmt '20140204' the split size is 12 {code} but I think {code} select * from table1 where gmt '20140202' and gmt '20140207' and spm_type = '2' and spm like '1.%' {code} and {code} select * from table1 where gmt '20140202' and gmt '20140207' {code} the two sql will has the same split , but why not? -- This message was sent by Atlassian JIRA
[jira] [Commented] (PHOENIX-1074) ParallelIteratorRegionSplitterFactory get Splits is not rational
[ https://issues.apache.org/jira/browse/PHOENIX-1074?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=14068585#comment-14068585 ] jay wong commented on PHOENIX-1074: --- [~jamestaylor] add a test case. the second testcase can attest the problem I know the rule is hard to correct. but it's not advisable ParallelIteratorRegionSplitterFactory get Splits is not rational Key: PHOENIX-1074 URL: https://issues.apache.org/jira/browse/PHOENIX-1074 Project: Phoenix Issue Type: Bug Reporter: jay wong Attachments: SkipScanFilterSaltedIntersectTest.java create a table {code} create table if not exists table1( gmt VARCHAR NOT NULL, spm_type VARCHAR NOT NULL, spm VARCHAR NOT NULL, A.int_a INTEGER, B.int_b INTEGER, B.int_c INTEGER CONSTRAINT pk PRIMARY KEY (gmt, spm_type, spm)) SALT_BUCKETS = 4, bloomfilter='ROW'; {code} and made the table 29 partitions as this. |startrow|endrow| | |\x0020140201| |\x0020140201|\x0020140202| |\x0020140202|\x0020140203| |\x0020140203|\x0020140204| |\x0020140204|\x0020140205| |\x0020140205|\x0020140206| |\x0020140206|\x0020140207| |\x0020140207|\x0120140201| |\x0120140201|\x0120140202| |\x0120140202|\x0120140203| |\x0120140203|\x0120140204| |\x0120140204|\x0120140205| |\x0120140205|\x0120140206| |\x0120140206|\x0120140207| |\x0120140207|\x0220140201| |\x0220140201|\x0220140202| |\x0220140202|\x0220140203| |\x0220140203|\x0220140204| |\x0220140204|\x0220140205| |\x0220140205|\x0220140206| |\x0220140206|\x0220140207| |\x0220140207|\x0320140201| |\x0320140201|\x0320140202| |\x0320140202|\x0320140203| |\x0320140203|\x0320140204| |\x0320140204|\x0320140205| |\x0320140205|\x0320140206| |\x0320140206|\x0320140207| |\x0320140207| | Then insert some data; |GMT | SPM_TYPE |SPM | INT_A| INT_B| INT_C | | 20140201 | 1 | 1.2.3.4546 | 218| 218| null | | 20140201 | 1 | 1.2.44545 | 190| 190| null | | 20140201 | 1 | 1.353451312 | 246| 246| null | | 20140201 | 2 | 1.2.3.6775 | 183| 183| null | |...|...|...|...|...|...| | 20140207 | 3 | 1.2.3.4546 | 224| 224| null | | 20140207 | 3 | 1.2.44545 | 196| 196| null | | 20140207 | 3 | 1.353451312 | 168| 168| null | | 20140207 | 4 | 1.2.3.6775 | 189| 189| null | | 20140207 | 4 | 1.23.345345 | 217| 217| null | | 20140207 | 4 | 1.23234234234 | 245| 245| null | print a log like this {code} public class ParallelIterators extends ExplainTable implements ResultIterators { @Override public ListPeekingResultIterator getIterators() throws SQLException { boolean success = false; final ConnectionQueryServices services = context.getConnection().getQueryServices(); ReadOnlyProps props = services.getProps(); int numSplits = splits.size(); ListPeekingResultIterator iterators = new ArrayListPeekingResultIterator(numSplits); ListPairbyte[],FuturePeekingResultIterator futures = new ArrayListPairbyte[],FuturePeekingResultIterator(numSplits); final UUID scanId = UUID.randomUUID(); try { ExecutorService executor = services.getExecutor(); System.out.println(the split size is + numSplits); } } {code} then execute some sql {code} select * from table1 where gmt '20140202' and gmt '20140207' and spm_type = '2' and spm like '1.%' the split size is 31 select * from table1 where gmt '20140202' and gmt '20140207' and spm_type = '2' the split size is 31 select * from table1 where gmt '20140202' and gmt '20140207' the split size is 27 select * from table1 where gmt '20140202' and gmt '20140204' and spm_type = '2' and spm like '1.%' the split size is 28 select * from table1 where gmt '20140202' and gmt '20140204' and spm_type = '2' the split size is 28 select * from table1 where gmt '20140202' and gmt '20140204' the split size is 12 {code} but I think {code} select * from table1 where gmt '20140202' and gmt '20140207' and spm_type = '2' and spm like '1.%' {code} and {code} select * from table1 where gmt '20140202' and gmt '20140207' {code} the two sql will has the same split , but why not? -- This message was sent by Atlassian JIRA (v6.2#6252)
[jira] [Commented] (PHOENIX-1074) ParallelIteratorRegionSplitterFactory get Splits is not rational
[ https://issues.apache.org/jira/browse/PHOENIX-1074?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=14057369#comment-14057369 ] jay wong commented on PHOENIX-1074: --- [~jamestaylor] please check my problem again. this is my primary key. and salt_buckets. {code} CONSTRAINT pk PRIMARY KEY (gmt, spm_type, spm)) SALT_BUCKETS = 4 {code} {code} select * from table1 where gmt '20140202' and gmt '20140204' the split size is 12 (is logical) {code} {code} select * from table1 where gmt '20140202' and gmt '20140204' and spm_type = '2' the split size is 28(I think the split size is also 12 is logical) {code} this is only a epitome. In my online table . has 1900 regions. If it's run with logical splits policy, only has nearly 20 splits. BUT it has 1900 splits ParallelIteratorRegionSplitterFactory get Splits is not rational Key: PHOENIX-1074 URL: https://issues.apache.org/jira/browse/PHOENIX-1074 Project: Phoenix Issue Type: Bug Reporter: jay wong create a table {code} create table if not exists table1( gmt VARCHAR NOT NULL, spm_type VARCHAR NOT NULL, spm VARCHAR NOT NULL, A.int_a INTEGER, B.int_b INTEGER, B.int_c INTEGER CONSTRAINT pk PRIMARY KEY (gmt, spm_type, spm)) SALT_BUCKETS = 4, bloomfilter='ROW'; {code} and made the table 29 partitions as this. |startrow|endrow| | |\x0020140201| |\x0020140201|\x0020140202| |\x0020140202|\x0020140203| |\x0020140203|\x0020140204| |\x0020140204|\x0020140205| |\x0020140205|\x0020140206| |\x0020140206|\x0020140207| |\x0020140207|\x0120140201| |\x0120140201|\x0120140202| |\x0120140202|\x0120140203| |\x0120140203|\x0120140204| |\x0120140204|\x0120140205| |\x0120140205|\x0120140206| |\x0120140206|\x0120140207| |\x0120140207|\x0220140201| |\x0220140201|\x0220140202| |\x0220140202|\x0220140203| |\x0220140203|\x0220140204| |\x0220140204|\x0220140205| |\x0220140205|\x0220140206| |\x0220140206|\x0220140207| |\x0220140207|\x0320140201| |\x0320140201|\x0320140202| |\x0320140202|\x0320140203| |\x0320140203|\x0320140204| |\x0320140204|\x0320140205| |\x0320140205|\x0320140206| |\x0320140206|\x0320140207| |\x0320140207| | Then insert some data; |GMT | SPM_TYPE |SPM | INT_A| INT_B| INT_C | | 20140201 | 1 | 1.2.3.4546 | 218| 218| null | | 20140201 | 1 | 1.2.44545 | 190| 190| null | | 20140201 | 1 | 1.353451312 | 246| 246| null | | 20140201 | 2 | 1.2.3.6775 | 183| 183| null | |...|...|...|...|...|...| | 20140207 | 3 | 1.2.3.4546 | 224| 224| null | | 20140207 | 3 | 1.2.44545 | 196| 196| null | | 20140207 | 3 | 1.353451312 | 168| 168| null | | 20140207 | 4 | 1.2.3.6775 | 189| 189| null | | 20140207 | 4 | 1.23.345345 | 217| 217| null | | 20140207 | 4 | 1.23234234234 | 245| 245| null | print a log like this {code} public class ParallelIterators extends ExplainTable implements ResultIterators { @Override public ListPeekingResultIterator getIterators() throws SQLException { boolean success = false; final ConnectionQueryServices services = context.getConnection().getQueryServices(); ReadOnlyProps props = services.getProps(); int numSplits = splits.size(); ListPeekingResultIterator iterators = new ArrayListPeekingResultIterator(numSplits); ListPairbyte[],FuturePeekingResultIterator futures = new ArrayListPairbyte[],FuturePeekingResultIterator(numSplits); final UUID scanId = UUID.randomUUID(); try { ExecutorService executor = services.getExecutor(); System.out.println(the split size is + numSplits); } } {code} then execute some sql {code} select * from table1 where gmt '20140202' and gmt '20140207' and spm_type = '2' and spm like '1.%' the split size is 31 select * from table1 where gmt '20140202' and gmt '20140207' and spm_type = '2' the split size is 31 select * from table1 where gmt '20140202' and gmt '20140207' the split size is 27 select * from table1 where gmt '20140202' and gmt '20140204' and spm_type = '2' and spm like '1.%' the split size is 28 select * from table1 where gmt '20140202' and gmt '20140204' and spm_type = '2' the split size is 28 select * from table1 where gmt '20140202' and gmt '20140204' the split size is 12 {code} but I think {code} select * from table1 where gmt '20140202' and gmt
[jira] [Commented] (PHOENIX-1074) ParallelIteratorRegionSplitterFactory get Splits is not rational
[ https://issues.apache.org/jira/browse/PHOENIX-1074?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=14057436#comment-14057436 ] James Taylor commented on PHOENIX-1074: --- The second query is using a skip scan because there's range information for all column in your PK: {code} select * from table1 where gmt '20140202' and gmt '20140207' and spm_type = '2' and spm like '1.%' {code} So it'll run the skip scan over all regions since the table is salted. How is performance for this query? You can force it to do a range scan with a hint like this: {code} select /*+ RANGE_SCAN */ from table1 where gmt '20140202' and gmt '20140207' and spm_type = '2' and spm like '1.%' {code} Please let us know how performance compares between the two. ParallelIteratorRegionSplitterFactory get Splits is not rational Key: PHOENIX-1074 URL: https://issues.apache.org/jira/browse/PHOENIX-1074 Project: Phoenix Issue Type: Bug Reporter: jay wong create a table {code} create table if not exists table1( gmt VARCHAR NOT NULL, spm_type VARCHAR NOT NULL, spm VARCHAR NOT NULL, A.int_a INTEGER, B.int_b INTEGER, B.int_c INTEGER CONSTRAINT pk PRIMARY KEY (gmt, spm_type, spm)) SALT_BUCKETS = 4, bloomfilter='ROW'; {code} and made the table 29 partitions as this. |startrow|endrow| | |\x0020140201| |\x0020140201|\x0020140202| |\x0020140202|\x0020140203| |\x0020140203|\x0020140204| |\x0020140204|\x0020140205| |\x0020140205|\x0020140206| |\x0020140206|\x0020140207| |\x0020140207|\x0120140201| |\x0120140201|\x0120140202| |\x0120140202|\x0120140203| |\x0120140203|\x0120140204| |\x0120140204|\x0120140205| |\x0120140205|\x0120140206| |\x0120140206|\x0120140207| |\x0120140207|\x0220140201| |\x0220140201|\x0220140202| |\x0220140202|\x0220140203| |\x0220140203|\x0220140204| |\x0220140204|\x0220140205| |\x0220140205|\x0220140206| |\x0220140206|\x0220140207| |\x0220140207|\x0320140201| |\x0320140201|\x0320140202| |\x0320140202|\x0320140203| |\x0320140203|\x0320140204| |\x0320140204|\x0320140205| |\x0320140205|\x0320140206| |\x0320140206|\x0320140207| |\x0320140207| | Then insert some data; |GMT | SPM_TYPE |SPM | INT_A| INT_B| INT_C | | 20140201 | 1 | 1.2.3.4546 | 218| 218| null | | 20140201 | 1 | 1.2.44545 | 190| 190| null | | 20140201 | 1 | 1.353451312 | 246| 246| null | | 20140201 | 2 | 1.2.3.6775 | 183| 183| null | |...|...|...|...|...|...| | 20140207 | 3 | 1.2.3.4546 | 224| 224| null | | 20140207 | 3 | 1.2.44545 | 196| 196| null | | 20140207 | 3 | 1.353451312 | 168| 168| null | | 20140207 | 4 | 1.2.3.6775 | 189| 189| null | | 20140207 | 4 | 1.23.345345 | 217| 217| null | | 20140207 | 4 | 1.23234234234 | 245| 245| null | print a log like this {code} public class ParallelIterators extends ExplainTable implements ResultIterators { @Override public ListPeekingResultIterator getIterators() throws SQLException { boolean success = false; final ConnectionQueryServices services = context.getConnection().getQueryServices(); ReadOnlyProps props = services.getProps(); int numSplits = splits.size(); ListPeekingResultIterator iterators = new ArrayListPeekingResultIterator(numSplits); ListPairbyte[],FuturePeekingResultIterator futures = new ArrayListPairbyte[],FuturePeekingResultIterator(numSplits); final UUID scanId = UUID.randomUUID(); try { ExecutorService executor = services.getExecutor(); System.out.println(the split size is + numSplits); } } {code} then execute some sql {code} select * from table1 where gmt '20140202' and gmt '20140207' and spm_type = '2' and spm like '1.%' the split size is 31 select * from table1 where gmt '20140202' and gmt '20140207' and spm_type = '2' the split size is 31 select * from table1 where gmt '20140202' and gmt '20140207' the split size is 27 select * from table1 where gmt '20140202' and gmt '20140204' and spm_type = '2' and spm like '1.%' the split size is 28 select * from table1 where gmt '20140202' and gmt '20140204' and spm_type = '2' the split size is 28 select * from table1 where gmt '20140202' and gmt '20140204' the split size is 12 {code} but I think {code} select * from table1 where gmt '20140202' and gmt '20140207' and