Author: david
Date: Tue Jan 10 09:44:31 2012
New Revision: 10609

Log:
Copy Mike's csv:import task for ISAAR data

Added:
   trunk/lib/task/csvIsaarImportTask.class.php
      - copied, changed from r10605, trunk/lib/task/csvImportTask.class.php

Copied and modified: trunk/lib/task/csvIsaarImportTask.class.php (from r10605, 
trunk/lib/task/csvImportTask.class.php)
==============================================================================
--- trunk/lib/task/csvImportTask.class.php      Tue Jan 10 00:03:13 2012        
(r10605, copy source)
+++ trunk/lib/task/csvIsaarImportTask.class.php Tue Jan 10 09:44:31 2012        
(r10609)
@@ -18,18 +18,44 @@
  */
 
 /**
- * Import csv data
+ * Import ISAAR formatted csv data
  *
  * @package    symfony
  * @subpackage task
  * @author     Mike Cantelon <[email protected]>
+ * @author     David Juhasz <[email protected]>
  * @version    SVN: $Id$
  */
-class csvImportTask extends sfBaseTask
+class csvIsaarImportTask extends sfBaseTask
 {
   protected static
     $count = 0;
 
+  protected
+    $columns = array(
+      'authorizedFormOfName',
+      'datesOfExistence',
+      'descriptionIdentifier',
+      'entityType',
+      'functions',
+      'generalContext',
+      'history',
+      'identifier',
+      'institutionIdentifier',
+      'internalStructures',
+      'languages',
+      'legalStatus',
+      'maintenanceNotes',
+      'mandates',
+      'otherNames',
+      'parallelNames',
+      'places',
+      'rules',
+      'scripts',
+      'sources',
+      'standardizedNames'
+    );
+
   /**
    * @see sfTask
    */
@@ -44,30 +70,14 @@
     ));
 
     $this->namespace = 'csv';
-    $this->name = 'import';
+    $this->name = 'isaar-import';
     $this->briefDescription = 'Import csv data';
 
     $this->detailedDescription = <<<EOF
-Import CSV data
+Import ISAAR formatted CSV data
 EOF;
   }
 
-  protected function loadTaxonomyTerms($taxonomies)
-  {
-    $taxonomyTerms = array();
-
-    foreach($taxonomies as $taxonomyId => $varName)
-    {
-      $taxonomyTerms[$varName] = array();
-      foreach(QubitFlatfileImport::getTaxonomyTerms($taxonomyId) as $termId => 
$term)
-      {
-        $taxonomyTerms[$varName][$termId] = $term->name;
-      }
-    }
-
-    return $taxonomyTerms;
-  }
-
   /**
    * @see sfTask
    */
@@ -80,24 +90,12 @@
 
     if (false === $fh = fopen($arguments['filename'], 'rb'))
     {
-      throw new sfException('You must specify a valid filename');
+      throw new sfException(sprintf('Could not open file "%s"', 
$arguments['filename']));
     }
 
     $databaseManager = new sfDatabaseManager($this->configuration);
     $conn = $databaseManager->getDatabase('propel')->getConnection();
 
-    $defaultStatusId = sfConfig::get(
-      'app_defaultPubStatus',
-      QubitTerm::PUBLICATION_STATUS_DRAFT_ID
-    );
-    $defaultStatusTypeId = QubitTerm::STATUS_TYPE_PUBLICATION_ID;
-
-    // create note term if it doesn't yet exist
-    QubitFlatfileImport::createOrFetchTerm(
-      QubitTaxonomy::NOTE_TYPE_ID,
-      'Language note'
-    );
-
     // Load taxonomies into variables to avoid use of magic numbers
     $termData = $this->loadTaxonomyTerms(array(
       QubitTaxonomy::NOTE_TYPE_ID      => 'noteTypes',
@@ -105,422 +103,31 @@
       QubitTaxonomy::MATERIAL_TYPE_ID  => 'materialTypes'
     ));
 
-    // Define import
-    $import = new QubitFlatfileImport(array(
-      /* the status array is a place to put data that should be accessible
-         from closure logic using the getStatus method */
-      'status' => array(
-        'options'       => $options,
-        'sourceName'    => basename($arguments['filename']),
-        'materialTypes' => $termData['materialTypes'],
-      ),
-      'columnNames' => fgetcsv($fh, 60000), // 1st row supplies column 
names/order
-      'defaultStatusId' => $defaultStatusId,
-      'defaultStatusTypeId' => $defaultStatusTypeId,
-      'ignoreColumns' => array(
-        'RECORD_ID',
-        'PRI_REC_NO',
-        'sort'
-      ),
-      /* import columns that map directory to QubitInformationObject 
properties */
-      'standardColumns' => array(
-        'title',
-        'identifier',
-        'accruals',
-        'scopeAndContent',
-        'extentAndMedium',
-        'acquisition',
-        'accessConditions',
-        'locationOfCopies',
-        'locationOfOriginals',
-        'relatedUnitsOfDescription',
-        'edition',
-        'archivalHistory',
-        'arrangement',
-        'findingAids',
-        'sources',
-        'physicalCharacteristics',
-        'revisionHistory'
-      ),
-      /* import columns that should be redirected to QubitInformationObject
-         properties (and optionally transformed)
-      
-         Example:
-         'columnMap' => array(
-           'Archival History' => 'archivalHistory',
-           'Revision history' => array(
-             'column' => 'revision',
-             'transformationLogic' => function(&$self, $text)
-             {
-               return $self->appendWithLineBreakIfNeeded(
-                 $self->informationObject->revision,
-                 $text
-               );
-             }
-           )
-         ),
-      */
-      'columnMap' => array(
-        'physicalStorageLocation' => ' locationOfOriginals'
-      ),
-      /* import columns that can be added using the
-         QubitInformationObject::addProperty method */
-      'propertyMap' => array(
-        'titleStatementOfResponsibility' => 'titleStatementOfResponsibility',
-        'radNoteStatementOfResponsibility' => 
'statementOfResponsibilityRelatingToPublishersSeries',
-        'titleProperOfPublishersSeries' => 'titleProperOfPublishersSeries',
-        'statementOfScaleArchitectural' => 'statementOfScaleArchitectural',
-        'statementOfScaleCartographic' => 'statementOfScaleCartographic',
-        'radTitleProperOfPublishersSeries' => 'titleProperOfPublishersSeries'
-      ),
-      /* import columns that can be added as QubitNote objects */
-      'noteMap' => array(
-        'languages' => array(
-          'typeId' => array_search('Language note', $termData['noteTypes'])
-        ),
-        'radNoteConservation' => array(
-          'typeId' => array_search('Conservation note', $termData['noteTypes'])
-        ),
-        'radNoteGeneral' => array(
-          'typeId' => array_search('General note', $termData['noteTypes'])
-        ),
-        'radNoteSourceOfTitleProper' => array(
-          'typeId' => array_search('Source of title proper', 
$termData['titleNoteTypes'])
-        ),
-        'radTitleVariationsInTitle' => array(
-          'typeId' => array_search('Variations in title', 
$termData['titleNoteTypes'])
-        ),
-        'radTitleNoteContinuationOfTitle' => array(
-          'typeId' => array_search('Continuation of title', 
$termData['titleNoteTypes'])
-        ),
-        'radNoteAlphaNumericDesignation' => array(
-          'typeId' => 247,
-          'transformationLogic' => function(&$self, $text)
-          {
-             return 'Old Photo Number: '. $text;
-          }
-        )
-      ),
-      /* these values get stored to the rowStatusVars array */
-      'variableColumns' => array(
-        'UNIQUE_ID',
-        'PARENT_ID',
-        'datesOfCreation',
-        'copyrightStatus',
-        'copyrightExpires',
-        'copyrightHolder',
-        'datesOfCreationNote',
-        'datesOfCreationStart',
-        'datesOfCreationEnd'
-      ),
-      /* these values get exploded and stored to the rowStatusVars array */
-      'arrayColumns' => array(
-        'accessionNumber'     => '|',
-        'creators'            => '|',
-        'creatorHistory'      => '|',
-        'subjectAccessPoints' => '|',
-        'placeAccessPoints'   => '|',
-        'nameAccessPoints'    => '|'
-      ),
-      'rowInit' => function(&$self)
-      {
-        $self->informationObject = new QubitInformationObject;
-      },
-      'rowComplete' => function(&$self)
-      {
-        // set to default status
-        $self->informationObject->setStatus(array(
-          'statusId' => $self->defaultStatusId,
-          'typeId'   => $self->defaultStatusTypeId
-        ));
-
-        if (!$self->rowStatusVars['PARENT_ID'])
-        {
-          $parentId = QubitInformationObject::ROOT_ID;
-        } else {
-          $query = "SELECT target_id FROM keymap WHERE source_id=? AND 
source_name=?";
-          $statement = $self->sqlQuery($query, 
array($self->rowStatusVars['PARENT_ID'], $self->getStatus('sourceName')));
-          if ($mapEntry = $statement->fetch(PDO::FETCH_OBJ))
-          {
-            $parentId = $mapEntry->target_id;
-          } else {
-            throw new sfException('Could not find parent '. 
$self->rowStatusVars['PARENT_ID'] .'in key_map table');
-          }
-        }
-
-        $self->informationObject->parentId = $parentId;
-
-        if (!isset($self->testing) || !$self->testing)
-        {
-          $self->informationObject->save();
-        }
-
-        if (!$self->informationObject->id)
-        {
-          throw new sfException('Information object save failed');
-        } else {
-          // add keymap entry
-          $keymap = new QubitKeymap;
-          $keymap->sourceId   = $self->rowStatusVars['UNIQUE_ID'];
-          $keymap->sourceName = $self->getStatus('sourceName');
-          $keymap->targetId   = $self->informationObject->id;
-          $keymap->targetName = 'Qubit';
-          $keymap->save();
-
-#print $self->informationObject->levelOfDescriptionId ."\n";
-
-          // add subject access points
-          $accessPointColumns = array(
-            'subjectAccessPoints' => QubitTaxonomy::SUBJECT_ID,
-            'placeAccessPoints'   => QubitTaxonomy::PLACE_ID,
-          );
-
-          foreach($accessPointColumns as $columnName => $taxonomyId)
-          {
-            if (isset($self->rowStatusVars[$columnName]))
-            {
-              foreach($self->rowStatusVars[$columnName] as $subject)
-              {
-                $self->createAccessPoint($taxonomyId, $subject);
-              }
-            }
-          }
-
-          // add name access points
-          if (isset($self->rowStatusVars['nameAccessPoints']))
-          {
-            // add name access points
-            foreach($self->rowStatusVars['nameAccessPoints'] as $name)
-            {
-              $actor = $self->createOrFetchActor($name);
-              $relation = new QubitRelation;
-              $relation->subjectId = $self->informationObject->id;
-              $relation->objectId = $actor->id;
-              $relation->typeId = QubitTerm::NAME_ACCESS_POINT_ID;
-              $relation->save();
-            }
-          }
-
-          // add accessions
-          if (
-            isset($self->rowStatusVars['accessionNumber'])
-            && count($self->rowStatusVars['accessionNumber'])
-          )
-          {
-            foreach($self->rowStatusVars['accessionNumber'] as 
$accessionNumber)
-            {
-              $accession = new QubitAccession;
-              $accession->save();
-
-              // workaround to problem setting identifier
-              $query = "UPDATE accession SET identifier=? WHERE id=?";
-              $self->sqlQuery($query, $params = array($accessionNumber, 
$accession->id));
-            }
-          }
-
-          // add material-related term relation
-          if (isset($self->rowStatusVars['radGeneralMaterialDesignation']))
-          {
-            $self->createObjectTermRelation(
-              $self->informationObject->id,
-              $self->rowStatusVars['radGeneralMaterialDesignation']
-            );
-          }
-
-          // add copyright info
-          if (isset($self->rowStatusVars['copyrightStatus']) && 
$self->rowStatusVars['copyrightStatus'])
-          {
-            switch (strtolower($self->rowStatusVars['copyrightStatus']))
-            {
-              case 'under copyright':
-                print "Adding rights for ". $self->informationObject->title 
."...\n";
-                if ($self->rowStatusVars['copyrightHolder'])
-                {
-                  // add rightsholder
-                  $actor = new QubitRightsHolder;
-                  $actor->parentId = QubitActor::ROOT_ID;
-                  $actor->authorizedFormOfName = 
$self->rowStatusVars['copyrightHolder'];
-                  $actor->save();
-
-                  $self->createRightAndRelation(array(
-                    'rightsHolderId'    => $actor->id,
-                    'restriction'       => 1,
-                    'basisId'           => 170,
-                    'actId'             => 305,
-                    'copyrightStatusId' => 306
-                  ));
-                } else {
-                  throw new sfException('Copyright holder not specified');
-                }
-                break;
-
-              case 'public domain':
-                $self->createRightAndRelation(array(
-                  'restriction'       => 1,
-                  'basisId'           => 170,
-                  'actId'             => 305,
-                  'copyrightStatusId' => 307
-                ));
-                break;
-
-              default:
-                throw new sfException('Copyright status "'
-                  . $self->rowStatusVars['copyrightStatus']
-                  .'" not handled: adjust script or import data');
-                break;
-            }
-          }
-
-          // add creators and create events
-          if (isset($self->rowStatusVars['creators'])
-            && count($self->rowStatusVars['creators']))
-          {
-            foreach($self->rowStatusVars['creators'] as $creator)
-            {
-              // add create event if specified
-              if (
-                isset($self->rowStatusVars['datesOfCreationStart'])
-                || (
-                  isset($self->rowStatusVars['datesOfCreationStart'])
-                  && isset($self->rowStatusVars['datesOfCreationEnd'])
-                )
-              )
-              {
-                $endDate = (isset($self->rowStatusVars['datesOfCreationEnd']))
-                  ? $self->rowStatusVars['datesOfCreationEnd'] .'-00-00'
-                  : false;
-
-                $eventData = array(
-                  'startDate' => $self->rowStatusVars['datesOfCreationStart'] 
.'-00-00',
-                  'endDate'   => $endDate,
-                  'actorName' => $creator
-                );
-
-                foreach(array(
-                    'datesOfCreationNote' => 'description',
-                    'datesOfCreation'     => 'date'
-                  )
-                  as $statusVar => $eventProperty
-                )
-                {
-                  $eventData[$eventProperty] = 
$self->rowStatusVars[$statusVar];
-                }
-
-                if(isset($self->rowStatusVars['creatorHistory']))
-                {
-                  $creatorPosition = array_search($creator, 
$self->rowStatusVars['creators']);
-                  if 
(isset($self->rowStatusVars['creatorHistory'][$creatorPosition]))
-                  {
-                    $eventData['actorHistory'] = 
$self->rowStatusVars['creatorHistory'][$creatorPosition];
-                  }
-                }
-
-                $event = $self->createEvent(
-                  QubitTerm::CREATION_ID,
-                  $eventData
-                );
-              }
-            }
-          }
-
-          // if a role is found, create term and actor if need be
-          if (isset($self->rowStatusVars['actorRoles']))
-          {
-            foreach($self->rowStatusVars['actorRoles'] as $actorRole)
-            {
-              // create/fetch term
-              $term = $self->createOrFetchTerm(
-                QubitTaxonomy::EVENT_TYPE_ID,
-                $actorRole['role']
-              );
-
-              // create/fetch actor
-              $self->createOrFetchActor($actorRole['actor']);
-
-              $self->createEvent($term->id, array('actorName' => 
$actorRole['actor']));
-            }
-          }
-
-          // output progress information
-          if ($options = $self->getStatus('options'))
-          {
-            // row count isn't incremented until completion of this closure, 
so add one to reflect reality
-            $rowsProcessed = $self->getStatus('rows') + 1;
-            $memoryUsageMB = round(memory_get_usage() / (1024 * 1024), 2);
-            if ($options['rows-until-update'] && !($rowsProcessed % 
$options['rows-until-update']))
-            {
-              $elapsed = $self->getTimeElapsed();
-              $elapsedMinutes = round($elapsed / 60, 2);
-              $averageTime = round($elapsed / $rowsProcessed, 2);
-
-              print "\n". $rowsProcessed ." rows processed in ". 
$elapsedMinutes
-                . " minutes (". $averageTime ." second/row average, ". 
$memoryUsageMB ." MB used).\n";
-            }
-          }
-        }
-      }
-    ));
+    //$isaarCsv = new qtIsaarCsv;
 
-    $import->addColumnHandler('levelOfDescription', function(&$self, $data)
-    {
-      $self->informationObject->setLevelOfDescriptionByName($data);
-    });
+    // First row is header
+    $header = fgetcsv($fh);
 
-    $relatedActorHandler = function(&$self, $data)
-    {
-      if ($data)
-      {
-        // parse out actor role from column name
-        $pattern = '/^relatedActor(.*)By$|^relatedActor(.*)$/';
-        preg_match($pattern, $self->status['currentColumn'], $matches);
-        $termName = (isset($matches[1]) && $matches[1] != '') ? $matches[1] : 
'';
-        $termName = (isset($matches[2]) && $matches[2] != '') ? $matches[2] : 
$termName;
-
-        // note that role and actor should be created after saving info object
-        $self->rowStatusVars['actorRoles'] = 
(isset($self->rowStatusVars['actorRoles']))
-          ? $self->rowStatusVars['actorRoles']
-          : array();
-
-        array_push(
-          $self->rowStatusVars['actorRoles'],
-          array('role' => $termName, 'actor' => $data)
-        );
-      }
-    };
+    // camelCase header values
+    $header = array_map(array('QubitFlatFileImport', 'camelize'), $header);
+
+    var_dump($header);
+    die();
+  }
 
-    $import->addColumnHandler('relatedActorCommissionedBy', 
$relatedActorHandler);
-    $import->addColumnHandler('relatedActorPhotographer', 
$relatedActorHandler);
+  protected function loadTaxonomyTerms($taxonomies)
+  {
+    $taxonomyTerms = array();
 
-    $import->addColumnHandler('radGeneralMaterialDesignation', 
function(&$self, $data)
+    foreach($taxonomies as $taxonomyId => $varName)
     {
-      if ($data)
+      $taxonomyTerms[$varName] = array();
+      foreach(QubitFlatfileImport::getTaxonomyTerms($taxonomyId) as $termId => 
$term)
       {
-        $cvaToQubit = array(
-          'Text'              => 'Textual record',
-          'Photograph'        => 'Graphic material',
-          'ArchitecturalPlan' => 'Architectural drawing',
-          'Audio'             => 'Sound recording',
-          'MovingImage'       => 'Moving images',
-          'Map'               => 'Cartographic material'
-        );
-
-        if (isset($cvaToQubit[$data]))
-        {
-          $materialTermName = $cvaToQubit[$data];
-          if (in_array($materialTermName, $self->getStatus('materialTypes')))
-          {
-            $termId = array_search($materialTermName, 
$self->getStatus('materialTypes'));
-            // lookup material taxonomy ID from materialTypes
-            $self->rowStatusVars['radGeneralMaterialDesignation'] = $termId;
-          } else {
-            die('Could not find "'. $materialTermName .'" in materialTypes 
array.');
-          }
-        } else {
-          die('Could not find a way to handle radGeneralMaterialDesignation 
value "'. $data .'".');
-        }
+        $taxonomyTerms[$varName][$termId] = $term->name;
       }
-    });
+    }
 
-    $import->csv($fh);
+    return $taxonomyTerms;
   }
 }

-- 
You received this message because you are subscribed to the Google Groups 
"Qubit Toolkit Commits" group.
To post to this group, send email to [email protected].
To unsubscribe from this group, send email to 
[email protected].
For more options, visit this group at 
http://groups.google.com/group/qubit-commits?hl=en.

Reply via email to