[
https://issues.apache.org/jira/browse/DRILL-4553?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15217288#comment-15217288
]
Anton Fernando commented on DRILL-4553:
---------------------------------------
This is over JSON and CSV, in this scenario the security metadata is in csv and
the first view is created over it. Views 2 and 3 are used to secure data in
JSON with the security metadata in csv. We are currently evaluating Drill to
see if it is a good fit to analyze healthcare data and we have run into this
issue. The explain plan for the query that is not returning data is as follows:
0: jdbc:drill:zk=localhost:2181> explain plan for select a.facilityidentifier,
a.encounteridentifier from dischargedetail a, dischargephysn b where
a.encounteridentifier=b.encounteridentifier and
a.facilityidentifier=b.facilityidentifier;
+------+------+
| text | json |
+------+------+
| 00-00 Screen
00-01 Project(facilityidentifier=[$0], encounteridentifier=[$1])
00-02 Project(facilityidentifier=[$1], encounteridentifier=[$0])
00-03 Project(EncounterIdentifier=[$2], FacilityIdentifier=[$3],
EncounterIdentifier0=[$0], FacilityIdentifier0=[$1])
00-04 HashJoin(condition=[AND(=($2, $0), =($3, $1))],
joinType=[inner])
00-06 Project(EncounterIdentifier=[$0], FacilityIdentifier=[$1])
00-08 HashJoin(condition=[AND(=($1, $13), =($2, $14))],
joinType=[inner])
00-11 Project(EncounterIdentifier=[$0],
FacilityIdentifier=[$1], SettingOfCare=[$2], ITEM=[ITEM($3,
'MedicalProfessionalIdentifierRaw')], ITEM4=[ITEM($3,
'MedicalProfessionalRoleCodeRaw')], ITEM5=[ITEM($3,
'MedicalProfessionalRoleCode')], ITEM6=[ITEM($3, 'FirstNameRaw')],
ITEM7=[ITEM($3, 'LastNameRaw')], ITEM8=[ITEM($3, 'MiddleNameRaw')],
ITEM9=[ITEM($3, 'MedicalProfessionalPrimarySpecialtyRaw')], ITEM10=[ITEM($3,
'MedicalProfessionalSecondarySpecialtyRaw')], ITEM11=[ITEM($3,
'NationalProviderIdentifierRaw')], ITEM12=[ITEM($3,
'UniformProviderIdentifierRaw')])
00-14 Flatten(flattenField=[$3])
00-17 Project(EncounterIdentifier=[$0],
FacilityIdentifier=[ITEM($1, 'FacilityIdentifier')], SettingOfCare=[$2],
MedicalProfessionals=[$3])
00-21 Scan(groupscan=[EasyGroupScan
[selectionRoot=hdfs://sandbox.hortonworks.com:8020/tmp/json, numFiles=3,
columns=[`EncounterIdentifier`, `Facility`.`FacilityIdentifier`,
`SettingOfCare`, `MedicalProfessionals`],
files=[hdfs://sandbox.hortonworks.com:8020/tmp/json/403Encounters.json,
hdfs://sandbox.hortonworks.com:8020/tmp/json/404Encounters.json,
hdfs://sandbox.hortonworks.com:8020/tmp/json/405Encounters.json]]])
00-10 Project(FacilityIdentifier0=[$0], SettingOfCare0=[$1])
00-13 Project(FacilityIdentifier=[$1], SettingOfCare=[$2])
00-16 SelectionVectorRemover
00-20 Filter(condition=[=($0, UPPER(USER))])
00-24 Project(username=[ITEM($0, 0)],
FacilityIdentifier=[ITEM($0, 1)], SettingOfCare=[ITEM($0, 2)])
00-26 Scan(groupscan=[EasyGroupScan
[selectionRoot=hdfs://sandbox.hortonworks.com:8020/tmp/security, numFiles=1,
columns=[`columns`[0], `columns`[1], `columns`[2]],
files=[hdfs://sandbox.hortonworks.com:8020/tmp/security/lake_data_security.csv]]])
00-05 Project(EncounterIdentifier0=[$0], FacilityIdentifier0=[$1])
00-07 Project(EncounterIdentifier=[$1], FacilityIdentifier=[$2])
00-09 SelectionVectorRemover
00-12 Filter(condition=[=($2, $3)])
00-15 HashJoin(condition=[=($0, $4)], joinType=[inner])
00-19 Project(SettingOfCare=[$0],
EncounterIdentifier=[$1], ITEM=[ITEM($2, 'FacilityIdentifier')])
00-23 Scan(groupscan=[EasyGroupScan
[selectionRoot=hdfs://sandbox.hortonworks.com:8020/tmp/json, numFiles=3,
columns=[`SettingOfCare`, `EncounterIdentifier`,
`Facility`.`FacilityIdentifier`],
files=[hdfs://sandbox.hortonworks.com:8020/tmp/json/403Encounters.json,
hdfs://sandbox.hortonworks.com:8020/tmp/json/404Encounters.json,
hdfs://sandbox.hortonworks.com:8020/tmp/json/405Encounters.json]]])
00-18 SelectionVectorRemover
00-22 Filter(condition=[=($0, UPPER(USER))])
00-25 Project(username=[ITEM($0, 0)],
FacilityIdentifier=[ITEM($0, 1)])
00-27 Scan(groupscan=[EasyGroupScan
[selectionRoot=hdfs://sandbox.hortonworks.com:8020/tmp/security, numFiles=1,
columns=[`columns`[0], `columns`[1]],
files=[hdfs://sandbox.hortonworks.com:8020/tmp/security/lake_data_security.csv]]])
| {
"head" : {
"version" : 1,
"generator" : {
"type" : "ExplainHandler",
"info" : ""
},
"type" : "APACHE_DRILL_PHYSICAL",
"options" : [ ],
"queue" : 0,
"resultMode" : "EXEC"
},
"graph" : [ {
"pop" : "fs-scan",
"@id" : 21,
"userName" : "root",
"files" : [
"hdfs://sandbox.hortonworks.com:8020/tmp/json/403Encounters.json",
"hdfs://sandbox.hortonworks.com:8020/tmp/json/404Encounters.json",
"hdfs://sandbox.hortonworks.com:8020/tmp/json/405Encounters.json" ],
"storage" : {
"type" : "file",
"enabled" : true,
"connection" : "hdfs://sandbox.hortonworks.com:8020",
"config" : null,
"workspaces" : {
"discharges" : {
"location" : "/tmp",
"writable" : true,
"defaultInputFormat" : "avro"
}
},
"formats" : {
"avro" : {
"type" : "avro"
},
"json" : {
"type" : "json",
"extensions" : [ "json" ]
},
"csv" : {
"type" : "text",
"extensions" : [ "csv" ],
"delimiter" : ","
}
}
},
"format" : {
"type" : "json",
"extensions" : [ "json" ]
},
"columns" : [ "`EncounterIdentifier`", "`Facility`.`FacilityIdentifier`",
"`SettingOfCare`", "`MedicalProfessionals`" ],
"selectionRoot" : "hdfs://sandbox.hortonworks.com:8020/tmp/json",
"cost" : 2927.0
}, {
"pop" : "project",
"@id" : 17,
"exprs" : [ {
"ref" : "`EncounterIdentifier`",
"expr" : "`EncounterIdentifier`"
}, {
"ref" : "`FacilityIdentifier`",
"expr" : "`Facility`.`FacilityIdentifier`"
}, {
"ref" : "`SettingOfCare`",
"expr" : "`SettingOfCare`"
}, {
"ref" : "`MedicalProfessionals`",
"expr" : "`MedicalProfessionals`"
} ],
"child" : 21,
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000,
"cost" : 2927.0
}, {
"pop" : "flatten",
"@id" : 14,
"child" : 17,
"column" : "`MedicalProfessionals`",
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000,
"cost" : 2927.0
}, {
"pop" : "project",
"@id" : 11,
"exprs" : [ {
"ref" : "`EncounterIdentifier`",
"expr" : "`EncounterIdentifier`"
}, {
"ref" : "`FacilityIdentifier`",
"expr" : "`FacilityIdentifier`"
}, {
"ref" : "`SettingOfCare`",
"expr" : "`SettingOfCare`"
}, {
"ref" : "`ITEM`",
"expr" : "`MedicalProfessionals`.`MedicalProfessionalIdentifierRaw`"
}, {
"ref" : "`ITEM4`",
"expr" : "`MedicalProfessionals`.`MedicalProfessionalRoleCodeRaw`"
}, {
"ref" : "`ITEM5`",
"expr" : "`MedicalProfessionals`.`MedicalProfessionalRoleCode`"
}, {
"ref" : "`ITEM6`",
"expr" : "`MedicalProfessionals`.`FirstNameRaw`"
}, {
"ref" : "`ITEM7`",
"expr" : "`MedicalProfessionals`.`LastNameRaw`"
}, {
"ref" : "`ITEM8`",
"expr" : "`MedicalProfessionals`.`MiddleNameRaw`"
}, {
"ref" : "`ITEM9`",
"expr" : "`MedicalProfessionals`.`MedicalProfessionalPrimarySpecialtyRaw`"
}, {
"ref" : "`ITEM10`",
"expr" :
"`MedicalProfessionals`.`MedicalProfessionalSecondarySpecialtyRaw`"
}, {
"ref" : "`ITEM11`",
"expr" : "`MedicalProfessionals`.`NationalProviderIdentifierRaw`"
}, {
"ref" : "`ITEM12`",
"expr" : "`MedicalProfessionals`.`UniformProviderIdentifierRaw`"
} ],
"child" : 14,
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000,
"cost" : 2927.0
}, {
"pop" : "fs-scan",
"@id" : 23,
"userName" : "root",
"files" : [
"hdfs://sandbox.hortonworks.com:8020/tmp/json/403Encounters.json",
"hdfs://sandbox.hortonworks.com:8020/tmp/json/404Encounters.json",
"hdfs://sandbox.hortonworks.com:8020/tmp/json/405Encounters.json" ],
"storage" : {
"type" : "file",
"enabled" : true,
"connection" : "hdfs://sandbox.hortonworks.com:8020",
"config" : null,
"workspaces" : {
"discharges" : {
"location" : "/tmp",
"writable" : true,
"defaultInputFormat" : "avro"
}
},
"formats" : {
"avro" : {
"type" : "avro"
},
"json" : {
"type" : "json",
"extensions" : [ "json" ]
},
"csv" : {
"type" : "text",
"extensions" : [ "csv" ],
"delimiter" : ","
}
}
},
"format" : {
"type" : "json",
"extensions" : [ "json" ]
},
"columns" : [ "`SettingOfCare`", "`EncounterIdentifier`",
"`Facility`.`FacilityIdentifier`" ],
"selectionRoot" : "hdfs://sandbox.hortonworks.com:8020/tmp/json",
"cost" : 2927.0
}, {
"pop" : "project",
"@id" : 19,
"exprs" : [ {
"ref" : "`SettingOfCare`",
"expr" : "`SettingOfCare`"
}, {
"ref" : "`EncounterIdentifier`",
"expr" : "`EncounterIdentifier`"
}, {
"ref" : "`ITEM`",
"expr" : "`Facility`.`FacilityIdentifier`"
} ],
"child" : 23,
"initialAllocation" : 1000000,
"maxAllocation" : 10000000000,
"cost" : 2927.0
}, {
"pop" : "fs-scan",
"@id" : 27,
"userName" : "root",
"files" : [
"hdfs://sandbox.hortonworks.com:8020/tmp/security/lake_data_security.csv" ],
"storage" : {
"type" : "file",
"enabled" : true,
"connection" : "hdfs://sandbox.hortonworks.com:8020",
"config" : null,
"workspaces" : {
"discharges" : {
"location" : "/tmp",
"writable" : true,
"defaultInputFormat" : "avro"
}
},
"formats" : {
"avro" : {
"type" : "avro"
},
"json" : {
"type" : "json",
"extensions" : [ "json" ]
},
"csv" : {
"type" : "text",
"extensions" : [ "csv" ],
"delimiter" : ","
}
}
},
"format" : {
"type" : "text",
"extensions" : [ "csv" ],
"delimiter" : ","
},
"columns" : [ "`columns`[0]", "`columns`[1]" ],
"selectionRoot" : "hdfs://sandbox.hortonworks.co |
+------+------+
1 row selected (3.622 seconds)
> Joins using views are not returning results.
> --------------------------------------------
>
> Key: DRILL-4553
> URL: https://issues.apache.org/jira/browse/DRILL-4553
> Project: Apache Drill
> Issue Type: Bug
> Affects Versions: 1.5.0, 1.6.0
> Reporter: Anton Fernando
> Priority: Critical
>
> I have the following three views:
> create view view1 as select ..... from .... where username=user;
> create view view2 as select ..... from view1 as a, <file1> as b where a.col1
> = b.col1;
> create view view3 as select ..... from view1 as a, <file2> as b where a.col1
> = b.col1;
> A select * from each of these views works fine and returns the expected
> results. A self join on view2 and view3 also works fine. However when view2
> and view3 are joined on common keys there are no rows returned.
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)