The following commit has been merged in the master branch:
commit ee9ef082b050fcf1d4f6b54f05011a457f9931b1
Author: Andrei Zavada <[email protected]>
Date:   Fri Apr 12 19:57:18 2013 +0300

    parse and look at individual EDF patient_id fields (patch 1/2)

diff --git a/src/expdesign/recording.cc b/src/expdesign/recording.cc
index 704ba97..aa8ac62 100644
--- a/src/expdesign/recording.cc
+++ b/src/expdesign/recording.cc
@@ -130,7 +130,7 @@ CProfile (CSubject& J, const string& d, const 
sigfile::SChannel& h,
              // anchor zero page, get pagesize from edf^W CBinnedPower^W 
either goes
                time_t dima = F.start_time();
                printf( "CProfile::CProfile(): adding %s of [%s, %s, %s] %zu 
pages (%zu full, %zu in hypnogram) recorded %s",
-                       metrics::name(params.metric), F.subject(), F.session(), 
F.episode(),
+                       metrics::name(params.metric), F.id.c_str(), 
F.session(), F.episode(),
                        M.total_pages(), M.full_pages(), M.hypnogram().pages(), 
ctime( &dima));
 
                if ( pz - pa != (int)M.full_pages() ) {
@@ -185,7 +185,7 @@ CProfile (CRecording& M,
                pz = (size_t)difftime( M.F().end_time(), _0at) / _pagesize;
        time_t  dima = M.F().start_time();
        printf( "CProfile::CProfile(): adding single recording %s of [%s, %s, 
%s] %zu pages (%zu full, %zu in hypnogram) recorded %s",
-               metrics::name(params.metric), M.F().subject(), M.F().session(), 
M.F().episode(),
+               metrics::name(params.metric), M.F().id.c_str(), 
M.F().session(), M.F().episode(),
                M.total_pages(), M.full_pages(), M.hypnogram().pages(), ctime( 
&dima));
 
        if ( pz - pa != (int)M.full_pages() ) {
diff --git a/src/expdesign/recording.hh b/src/expdesign/recording.hh
index a0997d3..e197099 100644
--- a/src/expdesign/recording.hh
+++ b/src/expdesign/recording.hh
@@ -153,7 +153,7 @@ class CRecording {
                    const metrics::mc::SPPack&);
        ~CRecording ();
 
-       const char* subject() const      {  return _source().subject(); }
+       const char* subject() const      {  return _source().name.c_str(); }
        const char* session() const      {  return _source().session(); }
        const char* episode() const      {  return _source().episode(); }
        const char* channel() const      {  return 
_source().channel_by_id(_sig_no); }
diff --git a/src/expdesign/tree-scanner.cc b/src/expdesign/tree-scanner.cc
index b1e9d23..e074779 100644
--- a/src/expdesign/tree-scanner.cc
+++ b/src/expdesign/tree-scanner.cc
@@ -151,16 +151,16 @@ register_intree_source( sigfile::CTypedSource&& F,
                }
 
                // refuse to register sources of wrong subjects
-               if ( j_name != F().subject() ) {
-                       log_message( "%s: file belongs to subject \"%s\", is 
misplaced here under subject \"%s\"\n",
-                                    F().filename(), F().subject(), 
j_name.c_str());
+               if ( j_name != F().id ) {
+                       log_message( "%s: file belongs to subject %s (\"%s\"), 
is misplaced here under subject \"%s\"\n",
+                                    F().filename(), F().id.c_str(), 
F().name.c_str(), j_name.c_str());
                        return -1;
                }
                try {
-                       auto existing_group = group_of( F().subject());
+                       auto existing_group = group_of( F().id.c_str());
                        if ( g_name != existing_group ) {
-                               log_message( "%s: subject \"%s\" belongs to a 
different group (\"%s\")\n",
-                                            F().filename(), F().subject(), 
existing_group);
+                               log_message( "%s: subject %s (\"%s\") belongs 
to a different group (\"%s\")\n",
+                                            F().filename(), F().id.c_str(), 
F().name.c_str(), existing_group);
                                return -1;
                        }
                } catch (invalid_argument) {
@@ -189,8 +189,8 @@ register_intree_source( sigfile::CTypedSource&& F,
                        J = &*Ji;
 
              // insert/update episode observing start/end times
-               printf( "\nCExpDesign::register_intree_source( file: \"%s\", J: 
\"%s\", E: \"%s\", D: \"%s\")\n",
-                       F().filename(), F().subject(), F().episode(), 
F().session());
+               printf( "\nCExpDesign::register_intree_source( file: \"%s\", J: 
%s (\"%s\"), E: \"%s\", D: \"%s\")\n",
+                       F().filename(), F().id.c_str(), F().name.c_str(), 
F().episode(), F().session());
                switch ( J->measurements[F().session()].add_one(
                                 move(F), fft_params, swu_params, mc_params) ) 
{  // this will do it
                case AGH_EPSEQADD_OVERLAP:
diff --git a/src/libsigfile/edf.cc b/src/libsigfile/edf.cc
index 21c63a8..123cab6 100644
--- a/src/libsigfile/edf.cc
+++ b/src/libsigfile/edf.cc
@@ -41,7 +41,7 @@ template int sigfile::CEDFFile::export_original_( const 
char*, const char*) cons
 
 int
 sigfile::CEDFFile::
-set_subject( const char* s)
+set_patient_id( const char* s)
 {
        memcpy( header.patient_id, agh::str::pad( s, 80).c_str(), 80);
        return strlen(s) > 80;
@@ -265,7 +265,7 @@ CEDFFile (const char *fname_, TSubtype subtype_, int flags_,
        _lay_out_header();
 
        strncpy( header.version_number, version_string, 8);
-       set_subject( "Mr. Fafa");
+       set_patient_id( "Fafa_1 M X Mr._Fafa");
        set_recording_id( "Zzz");
        set_comment( fname_);
        set_start_time( time(NULL));
@@ -365,9 +365,9 @@ CEDFFile (CEDFFile&& rv)
        _start_time = rv._start_time;
        _end_time   = rv._end_time;
 
-       swap( _patient, rv._patient);
-       swap( _episode, rv._episode);
-       swap( _session, rv._session);
+       swap( _patient_id, rv._patient_id);
+       swap( _episode,    rv._episode);
+       swap( _session,    rv._session);
 
        swap( channels, rv.channels);
 
@@ -512,6 +512,11 @@ _parse_header()
                _get_next_field( header.data_record_size, 8);
                _get_next_field( header.n_channels,       4);
 
+               if ( strncmp( header.version_number, version_string, 8) ) {
+                       _status |= (bad_version | inoperable);
+                       return -2;
+               }
+
                _subtype =
                        (strncasecmp( header.reserved, "edf+c", 5) == 0)
                        ? edfplus_c
@@ -519,11 +524,6 @@ _parse_header()
                        ? edfplus_d
                        : edf;
 
-               if ( strncmp( header.version_number, version_string, 8) ) {
-                       _status |= (bad_version | inoperable);
-                       return -2;
-               }
-
                size_t  header_length;
 
                header_length = n_data_records = data_record_size = n_channels 
= 0;
@@ -542,7 +542,23 @@ _parse_header()
                        return -2;
                }
 
-               _patient = agh::str::trim( string (header.patient_id, 80));
+               _patient_id = agh::str::trim( string (header.patient_id, 80));
+
+             // sub-parse patient_id into SSubjectId struct
+               {
+                       auto subfields = agh::str::tokens( _patient_id, " ");
+                       if ( subfields.size() != 4 ) {
+                               fprintf( stderr, "%s: Nonconforming 
patient_id\n", filename());
+                               SSubjectId::id = SSubjectId::name = 
subfields.front();
+                               SSubjectId::gender = TGender::unknown;
+                       } else {
+                               auto i = subfields.begin();
+                               SSubjectId::id = *i++;
+                               SSubjectId::gender = 
SSubjectId::char_to_gender((*i++)[0]);
+                               SSubjectId::dob = SSubjectId::str_to_dob(*i++);
+                               SSubjectId::name = agh::str::join( 
agh::str::tokens(*i++, "_"), " ");
+                       }
+               }
 
              // deal with episode and session
                {
@@ -780,7 +796,7 @@ sigfile::CEDFFile::details( bool channels_too) const
                               " Record length\t: %zu sec\n",
                               filename(),
                               subtype_s(),
-                              subject(),
+                              patient_id(),
                               agh::str::trim( string (header.recording_id, 
80)).c_str(),
                               agh::str::trim( string (header.recording_date, 
8)).c_str(),
                               agh::str::trim( string (header.recording_time, 
8)).c_str(),
@@ -849,12 +865,11 @@ sigfile::CEDFFile::explain_edf_status( int status)
        if ( status & time_unparsable )
                recv.emplace_back( "* Time field ill-formed");
        if ( status & nosession )
-               recv.emplace_back(
-                       "* No session information in field RecordingID "
-                       "(expecting this to appear after "
-                       "episode designation followed by a comma)");
+               recv.emplace_back( "* No session information in field 
RecordingID");
        if ( status & non1020_channel )
                recv.emplace_back( "* Channel designation not following the 
10-20 system");
+       if ( status & nonconforming_patient_id )
+               recv.emplace_back( "* PatientId not conforming to section 
2.1.3.3 of EDF spec");
        if ( status & nonkemp_signaltype )
                recv.emplace_back( "* Signal type not listed in Kemp et al");
        if ( status & dup_channels )
diff --git a/src/libsigfile/edf.hh b/src/libsigfile/edf.hh
index e70c1d2..cd482e8 100644
--- a/src/libsigfile/edf.hh
+++ b/src/libsigfile/edf.hh
@@ -98,8 +98,8 @@ class CEDFFile
        // identification
        const char* filename() const
                { return _filename.c_str(); }
-       const char* subject() const
-               { return _patient.c_str(); }
+       const char* patient_id() const
+               { return _patient_id.c_str(); }
        const char* recording_id() const
                { return header.recording_id; }
        const char* comment() const
@@ -118,7 +118,7 @@ class CEDFFile
                { return n_data_records * data_record_size; }
 
        // setters
-       int set_subject( const char* s);
+       int set_patient_id( const char* s);
        int set_recording_id( const char* s);
        int set_episode( const char* s);
        int set_session( const char* s);
@@ -484,22 +484,23 @@ class CEDFFile
 
 
        enum TStatus : int {
-               ok                      = 0,
-               bad_header              = (1 <<  0),
-               bad_version             = (1 <<  1),
-               bad_numfld              = (1 <<  2),
-               bad_recording           = (1 <<  3),
-               date_unparsable         = (1 <<  4),
-               time_unparsable         = (1 <<  5),
-               nosession               = (1 <<  6),
-               noepisode               = (1 <<  7),
-               nonkemp_signaltype      = (1 <<  8),
-               non1020_channel         = (1 <<  9),
-               dup_channels            = (1 << 11),
-               nogain                  = (1 << 12),
-               sysfail                 = (1 << 13),
-               too_many_channels       = (1 << 14),
-               inoperable              = (bad_header
+               ok                       = 0,
+               bad_header               = (1 <<  0),
+               bad_version              = (1 <<  1),
+               bad_numfld               = (1 <<  2),
+               bad_recording            = (1 <<  3),
+               date_unparsable          = (1 <<  4),
+               time_unparsable          = (1 <<  5),
+               nosession                = (1 <<  6),
+               noepisode                = (1 <<  7),
+               nonkemp_signaltype       = (1 <<  8),
+               non1020_channel          = (1 <<  9),
+               dup_channels             = (1 << 11),
+               nogain                   = (1 << 12),
+               sysfail                  = (1 << 13),
+               too_many_channels        = (1 << 14),
+               nonconforming_patient_id = (1 << 15),
+               inoperable               = (bad_header
                                           | bad_version
                                           | bad_numfld
                                           | bad_recording
@@ -517,7 +518,7 @@ class CEDFFile
        time_t  _start_time,
                _end_time;
 
-       string  _patient,
+       string  _patient_id, // this is trimmed, raw; parsed into SSubjectId 
fields
        // take care of file being named 'episode-1.edf'
                _episode,
        // loosely/possibly also use RecordingID as session
diff --git a/src/libsigfile/source-base.hh b/src/libsigfile/source-base.hh
index fc8ad43..45bed88 100644
--- a/src/libsigfile/source-base.hh
+++ b/src/libsigfile/source-base.hh
@@ -186,9 +186,81 @@ struct SFilterPack {
 
 
 
+// follow http://www.edfplus.info/specs/edfplus.html#datarecords, section 
2.1.3.3
+struct SSubjectId {
+       string  id,
+               name;
+       time_t  dob;
+       enum class TGender : char {
+               unknown = 'X', male = 'M', female = 'F'
+       };
+       TGender gender;
+       static TGender char_to_gender( char x)
+               {
+                       switch ( x ) {
+                       case 'M':
+                       case 'm':
+                               return TGender::male;
+                       case 'F':
+                       case 'f':
+                               return TGender::female;
+                       default:
+                               return TGender::unknown;
+                       }
+               }
+       static int str_to_english_month( const string& s)
+               {
+                       if ( strcasecmp( s.c_str(), "jan") == 0 )
+                               return 0;
+                       if ( strcasecmp( s.c_str(), "feb") == 0 )
+                               return 1;
+                       if ( strcasecmp( s.c_str(), "mar") == 0 )
+                               return 2;
+                       if ( strcasecmp( s.c_str(), "apr") == 0 )
+                               return 3;
+                       if ( strcasecmp( s.c_str(), "may") == 0 )
+                               return 4;
+                       if ( strcasecmp( s.c_str(), "jun") == 0 )
+                               return 5;
+                       if ( strcasecmp( s.c_str(), "jul") == 0 )
+                               return 6;
+                       if ( strcasecmp( s.c_str(), "aug") == 0 )
+                               return 7;
+                       if ( strcasecmp( s.c_str(), "sep") == 0 )
+                               return 8;
+                       if ( strcasecmp( s.c_str(), "oct") == 0 )
+                               return 9;
+                       if ( strcasecmp( s.c_str(), "nov") == 0 )
+                               return 10;
+                       if ( strcasecmp( s.c_str(), "dec") == 0 )
+                               return 11;
+                       else
+                               return -1;
+               }
+       static time_t str_to_dob( const string& s)
+               {
+                       struct tm t;
+                       memset( &t, '\0', sizeof (t));
+
+                       // strptime( s, "%d-", &t); // will suck in non-US 
locales, so
+                       auto ff = agh::str::tokens(s, "-");
+                       if ( ff.size() != 3 )
+                               return (time_t)0;
+                       auto f = ff.begin();
+                       try {
+                               t.tm_mday = stoi( *f++);
+                               t.tm_mon  = str_to_english_month(*f++);
+                               t.tm_year = 1900 + stoi(*f);
+                               return mktime( &t);
+                       } catch (...) {
+                               return (time_t)0;
+                       }
+               }
+};
+
 
 
-class CSource {
+class CSource : public SSubjectId {
        friend class CTypedSource;
     protected:
        string  _filename;
@@ -216,7 +288,7 @@ class CSource {
                {
                        return _filename.c_str();
                }
-       virtual const char* subject()                   const = 0;
+       virtual const char* patient_id()                const = 0;
        virtual const char* recording_id()              const = 0;
        virtual const char* comment()                   const = 0;
        // probably parsed out of recording_id
@@ -280,7 +352,7 @@ class CSource {
                }
 
       // setters
-       virtual int set_subject( const char*)         = 0;
+       virtual int set_patient_id( const char*)      = 0;
        virtual int set_recording_id( const char*)    = 0;
        virtual int set_episode( const char*)         = 0;
        virtual int set_session( const char*)         = 0;
diff --git a/src/metrics/mc.cc b/src/metrics/mc.cc
index f4e5afc..26c5e5f 100644
--- a/src/metrics/mc.cc
+++ b/src/metrics/mc.cc
@@ -162,7 +162,7 @@ export_tsv( const string& fname) const
        fprintf( f, "## Subject: %s;  Session: %s, Episode: %s recorded %.*s;  
Channel: %s\n"
                 "## Total EEG Microcontinuity course (%zu %zu-sec pages) from 
%g up to %g Hz in bins of %g Hz\n"
                 "#Page\t",
-                _using_F().subject(), _using_F().session(), 
_using_F().episode(),
+                _using_F().name.c_str(), _using_F().session(), 
_using_F().episode(),
                 (int)strlen(asctime_)-1, asctime_,
                 _using_F().channel_by_id(_using_sig_no),
                 pages(), Pp.pagesize, Pp.freq_from, Pp.freq_from + 
Pp.bandwidth * bins(), Pp.bandwidth);
@@ -196,7 +196,7 @@ export_tsv( size_t bin,
        fprintf( f, "## Microcontinuity profile of\n"
                 "## Subject: %s;  Session: %s, Episode: %s recorded %.*s;  
Channel: %s\n"
                 "## Course (%zu %zu-sec pages) in range %g-%g Hz\n",
-                _using_F().subject(), _using_F().session(), 
_using_F().episode(),
+                _using_F().name.c_str(), _using_F().session(), 
_using_F().episode(),
                 (int)strlen(asctime_)-1, asctime_,
                 _using_F().channel_by_id(_using_sig_no),
                 pages(), Pp.pagesize, Pp.freq_from, Pp.freq_from + (bin+1) * 
Pp.bandwidth);
diff --git a/src/metrics/page-metrics-base.cc b/src/metrics/page-metrics-base.cc
index d7aab1a..3064806 100644
--- a/src/metrics/page-metrics-base.cc
+++ b/src/metrics/page-metrics-base.cc
@@ -217,7 +217,7 @@ export_tsv( const string& fname) const
        char *asctime_ = asctime( localtime( &sttm));
        fprintf( f, "## Subject: %s;  Session: %s, Episode: %s recorded %.*s;  
Channel: %s\n"
                 "#Page\t",
-                _using_F().subject(), _using_F().session(), 
_using_F().episode(),
+                _using_F().name.c_str(), _using_F().session(), 
_using_F().episode(),
                 (int)strlen(asctime_)-1, asctime_,
                 _using_F().channel_by_id(_using_sig_no));
 
diff --git a/src/metrics/psd.cc b/src/metrics/psd.cc
index 214b479..6137127 100644
--- a/src/metrics/psd.cc
+++ b/src/metrics/psd.cc
@@ -267,7 +267,7 @@ export_tsv( const string& fname) const
        fprintf( f, "## Subject: %s;  Session: %s, Episode: %s recorded %.*s;  
Channel: %s\n"
                 "## Total spectral power course (%zu %zu-sec pages) up to %g 
Hz in bins of %g Hz\n"
                 "#Page\t",
-                _using_F().subject(), _using_F().session(), 
_using_F().episode(),
+                _using_F().name.c_str(), _using_F().session(), 
_using_F().episode(),
                 (int)strlen(asctime_)-1, asctime_,
                 _using_F().channel_by_id(_using_sig_no),
                 pages(), Pp.pagesize, _bins*Pp.binsize, Pp.binsize);
@@ -303,7 +303,7 @@ export_tsv( float from, float upto,
        fprintf( f, "PSD profile of\n"
                 "## Subject: %s;  Session: %s, Episode: %s recorded %.*s;  
Channel: %s\n"
                 "## Course (%zu %zu-sec pages) in range %g-%g Hz\n",
-                _using_F().subject(), _using_F().session(), 
_using_F().episode(),
+                _using_F().name.c_str(), _using_F().session(), 
_using_F().episode(),
                 (int)strlen(asctime_)-1, asctime_,
                 _using_F().channel_by_id(_using_sig_no),
                 pages(), Pp.pagesize, from, upto);
diff --git a/src/metrics/swu.cc b/src/metrics/swu.cc
index 36a1312..0c8cafd 100644
--- a/src/metrics/swu.cc
+++ b/src/metrics/swu.cc
@@ -155,7 +155,7 @@ export_tsv( const string& fname) const
        fprintf( f, "## Subject: %s;  Session: %s, Episode: %s recorded %.*s;  
Channel: %s\n"
                 "## SWU course (%zu %zu-sec pages)\n"
                 "#Page\tSWU\n",
-                _using_F().subject(), _using_F().session(), 
_using_F().episode(),
+                _using_F().name.c_str(), _using_F().session(), 
_using_F().episode(),
                 (int)strlen(asctime_)-1, asctime_,
                 _using_F().channel_by_id(_using_sig_no),
                 pages(), Pp.pagesize);
diff --git a/src/tools/edfcat.cc b/src/tools/edfcat.cc
index 62b814e..108810f 100644
--- a/src/tools/edfcat.cc
+++ b/src/tools/edfcat.cc
@@ -390,7 +390,7 @@ exec_prune( const SOperation::SObject& obj)
                             F.data_record_size,
                             F.n_data_records);
 
-       G.set_subject( F.subject());
+       G.set_patient_id( F.patient_id());
        string tmp = F.recording_id();
        G.set_recording_id( tmp.c_str());
        tmp = F.comment();
diff --git a/src/ui/mw/admit-one.cc b/src/ui/mw/admit-one.cc
index db595fa..9f597b5 100644
--- a/src/ui/mw/admit-one.cc
+++ b/src/ui/mw/admit-one.cc
@@ -36,7 +36,7 @@ dnd_maybe_admit_one( const char* fname)
 
                snprintf_buf( "File: <i>%s</i>", fname);
                gtk_label_set_markup( lEdfImportCaption, __buf__);
-               snprintf_buf( "<b>%s</b>", (*Fp)().subject());
+               snprintf_buf( "<b>%s</b> (%s)", (*Fp)().id.c_str(), 
(*Fp)().name.c_str());
                gtk_label_set_markup( lEdfImportSubject, __buf__);
 
        } catch ( exception& ex) {
@@ -55,7 +55,7 @@ dnd_maybe_admit_one( const char* fname)
        try {
                gtk_entry_set_text(
                        eEdfImportGroupEntry,
-                       ED->group_of( (*Fp)().subject()));
+                       ED->group_of( (*Fp)().id.c_str()));
                gtk_widget_set_sensitive( (GtkWidget*)eEdfImportGroup, FALSE);
        } catch (invalid_argument ex) {
                for ( auto &i : AghGG ) {
@@ -109,7 +109,7 @@ dnd_maybe_admit_one( const char* fname)
                dest_path = g_strdup_printf( "%s/%s/%s/%s",
                                             ED->session_dir().c_str(),
                                             selected_group,
-                                            (*Fp)().subject(),
+                                            (*Fp)().id.c_str(),
                                             selected_session);
                dest = g_strdup_printf( "%s/%s.edf",
                                        dest_path,
diff --git a/src/ui/mw/construct.cc b/src/ui/mw/construct.cc
index 72d2658..ef0f36b 100644
--- a/src/ui/mw/construct.cc
+++ b/src/ui/mw/construct.cc
@@ -639,7 +639,6 @@ SExpDesignUIWidgets ()
                          (GCallback)gtk_tree_view_expand_all,
                          NULL);
        G_CONNECT_2 (tvGlobalAnnotations, row, activated);
-       FAFA;
 
        int c = 0;
        for ( auto column : {"Recording", "Page(s)", "Channel", "Type", 
"Label"} ) {

-- 
Sleep experiment manager

_______________________________________________
debian-med-commit mailing list
[email protected]
http://lists.alioth.debian.org/cgi-bin/mailman/listinfo/debian-med-commit

Reply via email to