[
https://issues.apache.org/jira/browse/PHOENIX-6364?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Istvan Toth resolved PHOENIX-6364.
----------------------------------
Resolution: Not A Bug
> CSVBulkload will cause duplicate data to be queried when a global index is
> created for each field in the table.
> ---------------------------------------------------------------------------------------------------------------
>
> Key: PHOENIX-6364
> URL: https://issues.apache.org/jira/browse/PHOENIX-6364
> Project: Phoenix
> Issue Type: Bug
> Components: core
> Affects Versions: 4.13.1, 5.0.0
> Reporter: XiaShuangQi
> Priority: Major
>
> HBase version :1.3.1
> phoenix version: apache-phoenix-4.13.0-HBase-1.3
> (download from [http://phoenix.apache.org/download.html])
> phoneinx client version: apache-phoenix-4.13.0-HBase-1.3
> (download from [http://phoenix.apache.org/download.html])
> step 1:create table
> 0: jdbc:phoenix> create table testtable3(
> . . . . . . . .> DATE varchar not null,
> . . . . . . . .> NUM integer not null,
> . . . . . . . .> SEQ_NUM integer not null,
> . . . . . . . .> ACCOUNT1 varchar not null,
> . . . . . . . .> ACCOUNTDES varchar,
> . . . . . . . .> FLAG varchar,
> . . . . . . . .> SALL DOUBLE,
> . . . . . . . .> CONSTRAINT PK PRIMARY KEY (DATE,NUM,SEQ_NUM,ACCOUNT1)
> . . . . . . . .> );
> step 2: upsert data with primary key
> UPSERT INTO testtable3 (DATE,NUM,SEQ_NUM,ACCOUNT1,ACCOUNTDES,FLAG,SALL)
> values ('20201001',30201001,13,'367392332','sffa1','','');
> UPSERT INTO testtable3 (DATE,NUM,SEQ_NUM,ACCOUNT1,ACCOUNTDES,FLAG,SALL)
> values ('20201002',30201002,14,'367392333','sffa2','','');
> UPSERT INTO testtable3 (DATE,NUM,SEQ_NUM,ACCOUNT1,ACCOUNTDES,FLAG,SALL)
> values ('20201003',30201003,15,'367392334','sffa3','','');
> UPSERT INTO testtable3 (DATE,NUM,SEQ_NUM,ACCOUNT1,ACCOUNTDES,FLAG,SALL)
> values ('20201004',30201004,16,'367392335','sffa4','','');
> UPSERT INTO testtable3 (DATE,NUM,SEQ_NUM,ACCOUNT1,ACCOUNTDES,FLAG,SALL)
> values ('20201005',30201005,17,'367392336','sffa5','','');
> UPSERT INTO testtable3 (DATE,NUM,SEQ_NUM,ACCOUNT1,ACCOUNTDES,FLAG,SALL)
> values ('20201006',30201006,18,'367392337','sffa6','','');
> UPSERT INTO testtable3 (DATE,NUM,SEQ_NUM,ACCOUNT1,ACCOUNTDES,FLAG,SALL)
> values ('20201007',30201007,19,'367392338','sffa7','','');
> UPSERT INTO testtable3 (DATE,NUM,SEQ_NUM,ACCOUNT1,ACCOUNTDES,FLAG,SALL)
> values ('20201008',30201008,20,'367392339','sffa8','','');
> UPSERT INTO testtable3 (DATE,NUM,SEQ_NUM,ACCOUNT1,ACCOUNTDES,FLAG,SALL)
> values ('20201009',30201009,21,'367392340','sffa9','','');
> UPSERT INTO testtable3 (DATE,NUM,SEQ_NUM,ACCOUNT1,ACCOUNTDES,FLAG,SALL)
> values ('20201010',30201010,22,'367392341','sffa10','','');
> step 3: create global index ,more than primary key
> CREATE INDEX testtable3_ID ON testtable3
> (ACCOUNT1,DATE,NUM,ACCOUNTDES,SEQ_NUM);
> step 3: CSVBulkload data,primary key same as before but other filed
> different
> |20201001|30201001|13|367392332|sffa2|1231243|23|
> |20201002|30201002|14|367392333|sffa3|1231244|24|
> |20201003|30201003|15|367392334|sffa4|1231245|25|
> |20201004|30201004|16|367392335|sffa5|1231246|26|
> |20201005|30201005|17|367392336|sffa6|1231247|27|
> |20201006|30201006|18|367392337|sffa7|1231248|28|
> |20201007|30201007|19|367392338|sffa8|1231249|29|
> |20201008|30201008|20|367392339|sffa9|1231250|30|
> |20201009|30201009|21|367392340|sffa10|1231251|31|
> |20201010|30201010|22|367392341|sffa11|1231252|32|
> step 4:select data
> select DATE,NUM,SEQ_NUM,ACCOUNT1 from testtable3;
> +------------+----------++----------------------+
> |DATE|NUM|SEQ_NUM|ACCOUNT1|
> +------------+----------++----------------------+
> |20201001|20201001|13|367392332|
> |20201001|30201001|13|367392332|
> |20201001|30201001|13|367392332|
> |20201002|30201002|14|367392333|
> |20201002|30201002|14|367392333|
> |20201003|30201003|15|367392334|
> |20201003|30201003|15|367392334|
> |20201004|30201004|16|367392335|
> |20201004|30201004|16|367392335|
> |20201005|30201005|17|367392336|
> |20201005|30201005|17|367392336|
> |20201006|30201006|18|367392337|
> |20201006|30201006|18|367392337|
> |20201007|30201007|19|367392338|
> |20201007|30201007|19|367392338|
> |20201008|30201008|20|367392339|
> |20201008|30201008|20|367392339|
> |20201009|30201009|21|367392340|
> |20201009|30201009|21|367392340|
> |20201010|30201010|22|367392341|
> |20201010|30201010|22|367392341|
> +------------+----------++----------------------+
> and we can see index data :
> 0: jdbc:phoenix> select * from testtable3_ID;
> 2021-02-04 19:50:48,685 | INFO | hconnection-0x3943a2be-shared--pool1-t352 |
> RPC Server Kerberos principal name for service=ClientService is
> hbase/[email protected] |
> org.apache.hadoop.hbase.ipc.RpcClientImpl$Connection.processPreambleResponse(RpcClientImpl.java:824)
> 2021-02-04 19:50:48,699 | INFO | hconnection-0x3943a2be-shared--pool1-t353 |
> RPC Server Kerberos principal name for service=ClientService is
> hbase/[email protected] |
> org.apache.hadoop.hbase.ipc.RpcClientImpl$Connection.processPreambleResponse(RpcClientImpl.java:824)
> +-------------+----------++--------------------------++-----------
> |:ACCOUNT1|:DATE|:NUM|0:ACCOUNTDES|:SEQ_NUM|
> +-------------+----------++--------------------------++-----------
> |367392332|20201001|20201001|sffa1|13|
> |367392332|20201001|30201001|sffa1|13|
> |367392332|20201001|30201001|sffa2|13|
> |367392333|20201002|30201002|sffa2|14|
> |367392333|20201002|30201002|sffa3|14|
> |367392334|20201003|30201003|sffa3|15|
> |367392334|20201003|30201003|sffa4|15|
> |367392335|20201004|30201004|sffa4|16|
> |367392335|20201004|30201004|sffa5|16|
> |367392336|20201005|30201005|sffa5|17|
> |367392336|20201005|30201005|sffa6|17|
> |367392337|20201006|30201006|sffa6|18|
> |367392337|20201006|30201006|sffa7|18|
> |367392338|20201007|30201007|sffa7|19|
> |367392338|20201007|30201007|sffa8|19|
> |367392339|20201008|30201008|sffa8|20|
> |367392339|20201008|30201008|sffa9|20|
> |367392340|20201009|30201009|sffa10|21|
> |367392340|20201009|30201009|sffa9|21|
> |367392341|20201010|30201010|sffa10|22|
> |367392341|20201010|30201010|sffa11|22|
> +-------------+----------++--------------------------++-----------
> or scan with hbase shell:
> hbase(main):007:0> scan "TESTTABLE3_ID"
> ROW COLUMN+CELL
> 2021-02-04 20:20:40,157 | INFO | hconnection-0x713a35c5-shared--pool1-t3 |
> RPC Server Kerberos principal name for service=ClientService is
> hbase/[email protected] |
> org.apache.hadoop.hbase.ipc.RpcClientImpl$Connection.processPreambleResponse(RpcClientImpl.java:824)
> 367392332\x0020201001\x00\x814>)sffa1\x00\x80\x00\x00\x0D column=0:_0,
> timestamp=1612438808215, value=x
> 367392332\x0020201001\x00\x81\xCC\xD4\xA9sffa1\x00\x80\x00\ column=0:_0,
> timestamp=1612438808215, value=x
> x00\x0D
> 367392332\x0020201001\x00\x81\xCC\xD4\xA9sffa2\x00\x80\x00\ column=0:_0,
> timestamp=1612439432910, value=_0
> x00\x0D
> 367392333\x0020201002\x00\x81\xCC\xD4\xAAsffa2\x00\x80\x00\ column=0:_0,
> timestamp=1612438808215, value=x
> x00\x0E
> 367392333\x0020201002\x00\x81\xCC\xD4\xAAsffa3\x00\x80\x00\ column=0:_0,
> timestamp=1612439432910, value=_0
> x00\x0E
> 367392334\x0020201003\x00\x81\xCC\xD4\xABsffa3\x00\x80\x00\ column=0:_0,
> timestamp=1612438808215, value=x
> x00\x0F
> 367392334\x0020201003\x00\x81\xCC\xD4\xABsffa4\x00\x80\x00\ column=0:_0,
> timestamp=1612439432910, value=_0
> x00\x0F
> 367392335\x0020201004\x00\x81\xCC\xD4\xACsffa4\x00\x80\x00\ column=0:_0,
> timestamp=1612438808215, value=x
> x00\x10
> 367392335\x0020201004\x00\x81\xCC\xD4\xACsffa5\x00\x80\x00\ column=0:_0,
> timestamp=1612439432910, value=_0
> x00\x10
> 367392336\x0020201005\x00\x81\xCC\xD4\xADsffa5\x00\x80\x00\ column=0:_0,
> timestamp=1612438808215, value=x
> x00\x11
> 367392336\x0020201005\x00\x81\xCC\xD4\xADsffa6\x00\x80\x00\ column=0:_0,
> timestamp=1612439432910, value=_0
> x00\x11
> 367392337\x0020201006\x00\x81\xCC\xD4\xAEsffa6\x00\x80\x00\ column=0:_0,
> timestamp=1612438808215, value=x
> x00\x12
> 367392337\x0020201006\x00\x81\xCC\xD4\xAEsffa7\x00\x80\x00\ column=0:_0,
> timestamp=1612439432910, value=_0
> x00\x12
> 367392338\x0020201007\x00\x81\xCC\xD4\xAFsffa7\x00\x80\x00\ column=0:_0,
> timestamp=1612438808215, value=x
> x00\x13
> 367392338\x0020201007\x00\x81\xCC\xD4\xAFsffa8\x00\x80\x00\ column=0:_0,
> timestamp=1612439432910, value=_0
> x00\x13
> 367392339\x0020201008\x00\x81\xCC\xD4\xB0sffa8\x00\x80\x00\ column=0:_0,
> timestamp=1612438808215, value=x
> x00\x14
> 367392339\x0020201008\x00\x81\xCC\xD4\xB0sffa9\x00\x80\x00\ column=0:_0,
> timestamp=1612439432910, value=_0
> x00\x14
> 367392340\x0020201009\x00\x81\xCC\xD4\xB1sffa10\x00\x80\x00 column=0:_0,
> timestamp=1612439432910, value=_0
> \x00\x15
> 367392340\x0020201009\x00\x81\xCC\xD4\xB1sffa9\x00\x80\x00\ column=0:_0,
> timestamp=1612438808215, value=x
> x00\x15
> 367392341\x0020201010\x00\x81\xCC\xD4\xB2sffa10\x00\x80\x00 column=0:_0,
> timestamp=1612438808215, value=x
> \x00\x16
> 367392341\x0020201010\x00\x81\xCC\xD4\xB2sffa11\x00\x80\x00 column=0:_0,
> timestamp=1612439432910, value=_0
> \x00\x16
>
--
This message was sent by Atlassian Jira
(v8.3.4#803005)