Reviewing version control logs*, I find that the motivation was indeed 
performance, though it's ETL-time performance, not query-time performance.

The patient_dimension that we modified was in our identified staging copy, not 
our de-identified run-time copy.

We use the concept_dimension for all our facts. We tried using the 
visit_dimension for age-at-visit, but we realized our patient/fact counting 
code<https://informatics.kumc.edu/work/browser/heron_load/concept_stats.sql> 
doesn't handle it.


* Details attached. Would that our whole version control history were still 
online publicly... sigh.

--
Dan

________________________________
From: Greater Plains Collaborative Software Development 
[[email protected]] on behalf of Tom Mish [[email protected]]
Sent: Tuesday, February 04, 2014 10:22 AM
To: [email protected]
Subject: Re: optional columns in i2b2 dimension tables RE: Minutes of GPV-DEV 
call 20140128

Great discussion all... I'm curious about the reasoning process that led to the 
decision to add optional columns to the patient dimension table. Mostly, I'm 
curious about performance...
# HG changeset patch
# User Dan Connolly <[email protected]>
# Date 1318524624 18000
#      Thu Oct 13 11:50:24 2011 -0500
# Node ID 6a5d319707b5044c961e2841428e81780b937762
# Parent  fbfa9eecb02a20076d3ab45859bc25383572f795
opimization: store SSN in (identified) patient_dimension (#619)
  rather than going back to @epic to get it.

diff --git a/heron_load/epic_dimensions_load.sql b/heron_load/epic_dimensions_load.sql
--- a/heron_load/epic_dimensions_load.sql
+++ b/heron_load/epic_dimensions_load.sql
@@ -210,9 +210,13 @@
  drop
  (zip_cd) ;
 
-/* Add date shift column. */
+/* Add date shift, ssn columns.
+Do them in independent statements in case only one is missing.
+ */
 alter table NightHerondata.patient_dimension
-  add date_shift number;
+  add (date_shift number) ;
+alter table NightHerondata.patient_dimension
+  add (ssn varchar2(45)) ;
 whenever sqlerror exit;
 
 whenever sqlerror continue;
@@ -228,12 +232,13 @@
 insert into NightHerondata.patient_dimension (
   PATIENT_NUM, VITAL_STATUS_CD, BIRTH_DATE, DEATH_DATE, sex_cd
 , language_cd, race_cd, marital_status_cd, religion_cd
-, date_shift
+, date_shift, ssn
 , import_date, upload_id, download_date, sourcesystem_cd )
 (SELECT 
    pmap.patient_num, pd.VITAL_STATUS_CD, pd.BIRTH_DATE, pd.DEATH_DATE
  ,pd.sex_cd,  pd.language_cd, pd.race_cd, pd.marital_status_cd, pd.religion_cd
  , round(dbms_random.value(-364,0))
+ , pd.ssn
  , sysdate, up.upload_id, :download_date, up.source_cd
  from NightHerondata.upload_status up,
     patient_dimension@epic pd
diff --git a/heron_load/ssdmf_load.sql b/heron_load/ssdmf_load.sql
--- a/heron_load/ssdmf_load.sql
+++ b/heron_load/ssdmf_load.sql
@@ -26,10 +26,6 @@
 /* Check for dmf table (in KUMC database). */
 select * from dmf.dmf_table@kumc where 1=0;
 
-/* Check for Epic clarity connection, views. */
-select * from clarity.patient@epic where 1=0;
-select * from patient_dimension@epic where 1=0;
-
 /* make some test data
 
 drop table clarityb_test_data;
@@ -67,7 +63,7 @@
 
 create global temporary table ssn_dob_match
 on commit preserve rows as
-select pm.patient_num
+select pd.patient_num
      , mod(ora_hash(dmf.social_security_number), &&heron_etl_chunks)+1 as part
      , dmf.social_security_number as ssn
      , NightHeronData.SQ_UP_ENCDIM_ENCOUNTERNUM.nextval as encounter_num
@@ -77,13 +73,8 @@
         else date_of_death
             end, 'mmddyyyy') as observation_date
 from dmf.dmf_table@kumc dmf
-join patient_dimension@epic pd
+join NightHeronData.patient_dimension pd
    on dmf.social_security_number = replace(pd.ssn, '-', '')
-join NightHeronData.patient_mapping pm
-   on pm.patient_ide = pd.patient_ide
-    and pm.patient_ide_status = 'A'
-    and pm.patient_ide_source = (
-      select source_cd from epic_audit_info)
 where
   dmf.date_of_birth = (case
     when substr(date_of_birth, 3, 2) = '00'

Reply via email to