Author: david
Date: Tue Jan 10 09:44:31 2012
New Revision: 10609
Log:
Copy Mike's csv:import task for ISAAR data
Added:
trunk/lib/task/csvIsaarImportTask.class.php
- copied, changed from r10605, trunk/lib/task/csvImportTask.class.php
Copied and modified: trunk/lib/task/csvIsaarImportTask.class.php (from r10605,
trunk/lib/task/csvImportTask.class.php)
==============================================================================
--- trunk/lib/task/csvImportTask.class.php Tue Jan 10 00:03:13 2012
(r10605, copy source)
+++ trunk/lib/task/csvIsaarImportTask.class.php Tue Jan 10 09:44:31 2012
(r10609)
@@ -18,18 +18,44 @@
*/
/**
- * Import csv data
+ * Import ISAAR formatted csv data
*
* @package symfony
* @subpackage task
* @author Mike Cantelon <[email protected]>
+ * @author David Juhasz <[email protected]>
* @version SVN: $Id$
*/
-class csvImportTask extends sfBaseTask
+class csvIsaarImportTask extends sfBaseTask
{
protected static
$count = 0;
+ protected
+ $columns = array(
+ 'authorizedFormOfName',
+ 'datesOfExistence',
+ 'descriptionIdentifier',
+ 'entityType',
+ 'functions',
+ 'generalContext',
+ 'history',
+ 'identifier',
+ 'institutionIdentifier',
+ 'internalStructures',
+ 'languages',
+ 'legalStatus',
+ 'maintenanceNotes',
+ 'mandates',
+ 'otherNames',
+ 'parallelNames',
+ 'places',
+ 'rules',
+ 'scripts',
+ 'sources',
+ 'standardizedNames'
+ );
+
/**
* @see sfTask
*/
@@ -44,30 +70,14 @@
));
$this->namespace = 'csv';
- $this->name = 'import';
+ $this->name = 'isaar-import';
$this->briefDescription = 'Import csv data';
$this->detailedDescription = <<<EOF
-Import CSV data
+Import ISAAR formatted CSV data
EOF;
}
- protected function loadTaxonomyTerms($taxonomies)
- {
- $taxonomyTerms = array();
-
- foreach($taxonomies as $taxonomyId => $varName)
- {
- $taxonomyTerms[$varName] = array();
- foreach(QubitFlatfileImport::getTaxonomyTerms($taxonomyId) as $termId =>
$term)
- {
- $taxonomyTerms[$varName][$termId] = $term->name;
- }
- }
-
- return $taxonomyTerms;
- }
-
/**
* @see sfTask
*/
@@ -80,24 +90,12 @@
if (false === $fh = fopen($arguments['filename'], 'rb'))
{
- throw new sfException('You must specify a valid filename');
+ throw new sfException(sprintf('Could not open file "%s"',
$arguments['filename']));
}
$databaseManager = new sfDatabaseManager($this->configuration);
$conn = $databaseManager->getDatabase('propel')->getConnection();
- $defaultStatusId = sfConfig::get(
- 'app_defaultPubStatus',
- QubitTerm::PUBLICATION_STATUS_DRAFT_ID
- );
- $defaultStatusTypeId = QubitTerm::STATUS_TYPE_PUBLICATION_ID;
-
- // create note term if it doesn't yet exist
- QubitFlatfileImport::createOrFetchTerm(
- QubitTaxonomy::NOTE_TYPE_ID,
- 'Language note'
- );
-
// Load taxonomies into variables to avoid use of magic numbers
$termData = $this->loadTaxonomyTerms(array(
QubitTaxonomy::NOTE_TYPE_ID => 'noteTypes',
@@ -105,422 +103,31 @@
QubitTaxonomy::MATERIAL_TYPE_ID => 'materialTypes'
));
- // Define import
- $import = new QubitFlatfileImport(array(
- /* the status array is a place to put data that should be accessible
- from closure logic using the getStatus method */
- 'status' => array(
- 'options' => $options,
- 'sourceName' => basename($arguments['filename']),
- 'materialTypes' => $termData['materialTypes'],
- ),
- 'columnNames' => fgetcsv($fh, 60000), // 1st row supplies column
names/order
- 'defaultStatusId' => $defaultStatusId,
- 'defaultStatusTypeId' => $defaultStatusTypeId,
- 'ignoreColumns' => array(
- 'RECORD_ID',
- 'PRI_REC_NO',
- 'sort'
- ),
- /* import columns that map directory to QubitInformationObject
properties */
- 'standardColumns' => array(
- 'title',
- 'identifier',
- 'accruals',
- 'scopeAndContent',
- 'extentAndMedium',
- 'acquisition',
- 'accessConditions',
- 'locationOfCopies',
- 'locationOfOriginals',
- 'relatedUnitsOfDescription',
- 'edition',
- 'archivalHistory',
- 'arrangement',
- 'findingAids',
- 'sources',
- 'physicalCharacteristics',
- 'revisionHistory'
- ),
- /* import columns that should be redirected to QubitInformationObject
- properties (and optionally transformed)
-
- Example:
- 'columnMap' => array(
- 'Archival History' => 'archivalHistory',
- 'Revision history' => array(
- 'column' => 'revision',
- 'transformationLogic' => function(&$self, $text)
- {
- return $self->appendWithLineBreakIfNeeded(
- $self->informationObject->revision,
- $text
- );
- }
- )
- ),
- */
- 'columnMap' => array(
- 'physicalStorageLocation' => ' locationOfOriginals'
- ),
- /* import columns that can be added using the
- QubitInformationObject::addProperty method */
- 'propertyMap' => array(
- 'titleStatementOfResponsibility' => 'titleStatementOfResponsibility',
- 'radNoteStatementOfResponsibility' =>
'statementOfResponsibilityRelatingToPublishersSeries',
- 'titleProperOfPublishersSeries' => 'titleProperOfPublishersSeries',
- 'statementOfScaleArchitectural' => 'statementOfScaleArchitectural',
- 'statementOfScaleCartographic' => 'statementOfScaleCartographic',
- 'radTitleProperOfPublishersSeries' => 'titleProperOfPublishersSeries'
- ),
- /* import columns that can be added as QubitNote objects */
- 'noteMap' => array(
- 'languages' => array(
- 'typeId' => array_search('Language note', $termData['noteTypes'])
- ),
- 'radNoteConservation' => array(
- 'typeId' => array_search('Conservation note', $termData['noteTypes'])
- ),
- 'radNoteGeneral' => array(
- 'typeId' => array_search('General note', $termData['noteTypes'])
- ),
- 'radNoteSourceOfTitleProper' => array(
- 'typeId' => array_search('Source of title proper',
$termData['titleNoteTypes'])
- ),
- 'radTitleVariationsInTitle' => array(
- 'typeId' => array_search('Variations in title',
$termData['titleNoteTypes'])
- ),
- 'radTitleNoteContinuationOfTitle' => array(
- 'typeId' => array_search('Continuation of title',
$termData['titleNoteTypes'])
- ),
- 'radNoteAlphaNumericDesignation' => array(
- 'typeId' => 247,
- 'transformationLogic' => function(&$self, $text)
- {
- return 'Old Photo Number: '. $text;
- }
- )
- ),
- /* these values get stored to the rowStatusVars array */
- 'variableColumns' => array(
- 'UNIQUE_ID',
- 'PARENT_ID',
- 'datesOfCreation',
- 'copyrightStatus',
- 'copyrightExpires',
- 'copyrightHolder',
- 'datesOfCreationNote',
- 'datesOfCreationStart',
- 'datesOfCreationEnd'
- ),
- /* these values get exploded and stored to the rowStatusVars array */
- 'arrayColumns' => array(
- 'accessionNumber' => '|',
- 'creators' => '|',
- 'creatorHistory' => '|',
- 'subjectAccessPoints' => '|',
- 'placeAccessPoints' => '|',
- 'nameAccessPoints' => '|'
- ),
- 'rowInit' => function(&$self)
- {
- $self->informationObject = new QubitInformationObject;
- },
- 'rowComplete' => function(&$self)
- {
- // set to default status
- $self->informationObject->setStatus(array(
- 'statusId' => $self->defaultStatusId,
- 'typeId' => $self->defaultStatusTypeId
- ));
-
- if (!$self->rowStatusVars['PARENT_ID'])
- {
- $parentId = QubitInformationObject::ROOT_ID;
- } else {
- $query = "SELECT target_id FROM keymap WHERE source_id=? AND
source_name=?";
- $statement = $self->sqlQuery($query,
array($self->rowStatusVars['PARENT_ID'], $self->getStatus('sourceName')));
- if ($mapEntry = $statement->fetch(PDO::FETCH_OBJ))
- {
- $parentId = $mapEntry->target_id;
- } else {
- throw new sfException('Could not find parent '.
$self->rowStatusVars['PARENT_ID'] .'in key_map table');
- }
- }
-
- $self->informationObject->parentId = $parentId;
-
- if (!isset($self->testing) || !$self->testing)
- {
- $self->informationObject->save();
- }
-
- if (!$self->informationObject->id)
- {
- throw new sfException('Information object save failed');
- } else {
- // add keymap entry
- $keymap = new QubitKeymap;
- $keymap->sourceId = $self->rowStatusVars['UNIQUE_ID'];
- $keymap->sourceName = $self->getStatus('sourceName');
- $keymap->targetId = $self->informationObject->id;
- $keymap->targetName = 'Qubit';
- $keymap->save();
-
-#print $self->informationObject->levelOfDescriptionId ."\n";
-
- // add subject access points
- $accessPointColumns = array(
- 'subjectAccessPoints' => QubitTaxonomy::SUBJECT_ID,
- 'placeAccessPoints' => QubitTaxonomy::PLACE_ID,
- );
-
- foreach($accessPointColumns as $columnName => $taxonomyId)
- {
- if (isset($self->rowStatusVars[$columnName]))
- {
- foreach($self->rowStatusVars[$columnName] as $subject)
- {
- $self->createAccessPoint($taxonomyId, $subject);
- }
- }
- }
-
- // add name access points
- if (isset($self->rowStatusVars['nameAccessPoints']))
- {
- // add name access points
- foreach($self->rowStatusVars['nameAccessPoints'] as $name)
- {
- $actor = $self->createOrFetchActor($name);
- $relation = new QubitRelation;
- $relation->subjectId = $self->informationObject->id;
- $relation->objectId = $actor->id;
- $relation->typeId = QubitTerm::NAME_ACCESS_POINT_ID;
- $relation->save();
- }
- }
-
- // add accessions
- if (
- isset($self->rowStatusVars['accessionNumber'])
- && count($self->rowStatusVars['accessionNumber'])
- )
- {
- foreach($self->rowStatusVars['accessionNumber'] as
$accessionNumber)
- {
- $accession = new QubitAccession;
- $accession->save();
-
- // workaround to problem setting identifier
- $query = "UPDATE accession SET identifier=? WHERE id=?";
- $self->sqlQuery($query, $params = array($accessionNumber,
$accession->id));
- }
- }
-
- // add material-related term relation
- if (isset($self->rowStatusVars['radGeneralMaterialDesignation']))
- {
- $self->createObjectTermRelation(
- $self->informationObject->id,
- $self->rowStatusVars['radGeneralMaterialDesignation']
- );
- }
-
- // add copyright info
- if (isset($self->rowStatusVars['copyrightStatus']) &&
$self->rowStatusVars['copyrightStatus'])
- {
- switch (strtolower($self->rowStatusVars['copyrightStatus']))
- {
- case 'under copyright':
- print "Adding rights for ". $self->informationObject->title
."...\n";
- if ($self->rowStatusVars['copyrightHolder'])
- {
- // add rightsholder
- $actor = new QubitRightsHolder;
- $actor->parentId = QubitActor::ROOT_ID;
- $actor->authorizedFormOfName =
$self->rowStatusVars['copyrightHolder'];
- $actor->save();
-
- $self->createRightAndRelation(array(
- 'rightsHolderId' => $actor->id,
- 'restriction' => 1,
- 'basisId' => 170,
- 'actId' => 305,
- 'copyrightStatusId' => 306
- ));
- } else {
- throw new sfException('Copyright holder not specified');
- }
- break;
-
- case 'public domain':
- $self->createRightAndRelation(array(
- 'restriction' => 1,
- 'basisId' => 170,
- 'actId' => 305,
- 'copyrightStatusId' => 307
- ));
- break;
-
- default:
- throw new sfException('Copyright status "'
- . $self->rowStatusVars['copyrightStatus']
- .'" not handled: adjust script or import data');
- break;
- }
- }
-
- // add creators and create events
- if (isset($self->rowStatusVars['creators'])
- && count($self->rowStatusVars['creators']))
- {
- foreach($self->rowStatusVars['creators'] as $creator)
- {
- // add create event if specified
- if (
- isset($self->rowStatusVars['datesOfCreationStart'])
- || (
- isset($self->rowStatusVars['datesOfCreationStart'])
- && isset($self->rowStatusVars['datesOfCreationEnd'])
- )
- )
- {
- $endDate = (isset($self->rowStatusVars['datesOfCreationEnd']))
- ? $self->rowStatusVars['datesOfCreationEnd'] .'-00-00'
- : false;
-
- $eventData = array(
- 'startDate' => $self->rowStatusVars['datesOfCreationStart']
.'-00-00',
- 'endDate' => $endDate,
- 'actorName' => $creator
- );
-
- foreach(array(
- 'datesOfCreationNote' => 'description',
- 'datesOfCreation' => 'date'
- )
- as $statusVar => $eventProperty
- )
- {
- $eventData[$eventProperty] =
$self->rowStatusVars[$statusVar];
- }
-
- if(isset($self->rowStatusVars['creatorHistory']))
- {
- $creatorPosition = array_search($creator,
$self->rowStatusVars['creators']);
- if
(isset($self->rowStatusVars['creatorHistory'][$creatorPosition]))
- {
- $eventData['actorHistory'] =
$self->rowStatusVars['creatorHistory'][$creatorPosition];
- }
- }
-
- $event = $self->createEvent(
- QubitTerm::CREATION_ID,
- $eventData
- );
- }
- }
- }
-
- // if a role is found, create term and actor if need be
- if (isset($self->rowStatusVars['actorRoles']))
- {
- foreach($self->rowStatusVars['actorRoles'] as $actorRole)
- {
- // create/fetch term
- $term = $self->createOrFetchTerm(
- QubitTaxonomy::EVENT_TYPE_ID,
- $actorRole['role']
- );
-
- // create/fetch actor
- $self->createOrFetchActor($actorRole['actor']);
-
- $self->createEvent($term->id, array('actorName' =>
$actorRole['actor']));
- }
- }
-
- // output progress information
- if ($options = $self->getStatus('options'))
- {
- // row count isn't incremented until completion of this closure,
so add one to reflect reality
- $rowsProcessed = $self->getStatus('rows') + 1;
- $memoryUsageMB = round(memory_get_usage() / (1024 * 1024), 2);
- if ($options['rows-until-update'] && !($rowsProcessed %
$options['rows-until-update']))
- {
- $elapsed = $self->getTimeElapsed();
- $elapsedMinutes = round($elapsed / 60, 2);
- $averageTime = round($elapsed / $rowsProcessed, 2);
-
- print "\n". $rowsProcessed ." rows processed in ".
$elapsedMinutes
- . " minutes (". $averageTime ." second/row average, ".
$memoryUsageMB ." MB used).\n";
- }
- }
- }
- }
- ));
+ //$isaarCsv = new qtIsaarCsv;
- $import->addColumnHandler('levelOfDescription', function(&$self, $data)
- {
- $self->informationObject->setLevelOfDescriptionByName($data);
- });
+ // First row is header
+ $header = fgetcsv($fh);
- $relatedActorHandler = function(&$self, $data)
- {
- if ($data)
- {
- // parse out actor role from column name
- $pattern = '/^relatedActor(.*)By$|^relatedActor(.*)$/';
- preg_match($pattern, $self->status['currentColumn'], $matches);
- $termName = (isset($matches[1]) && $matches[1] != '') ? $matches[1] :
'';
- $termName = (isset($matches[2]) && $matches[2] != '') ? $matches[2] :
$termName;
-
- // note that role and actor should be created after saving info object
- $self->rowStatusVars['actorRoles'] =
(isset($self->rowStatusVars['actorRoles']))
- ? $self->rowStatusVars['actorRoles']
- : array();
-
- array_push(
- $self->rowStatusVars['actorRoles'],
- array('role' => $termName, 'actor' => $data)
- );
- }
- };
+ // camelCase header values
+ $header = array_map(array('QubitFlatFileImport', 'camelize'), $header);
+
+ var_dump($header);
+ die();
+ }
- $import->addColumnHandler('relatedActorCommissionedBy',
$relatedActorHandler);
- $import->addColumnHandler('relatedActorPhotographer',
$relatedActorHandler);
+ protected function loadTaxonomyTerms($taxonomies)
+ {
+ $taxonomyTerms = array();
- $import->addColumnHandler('radGeneralMaterialDesignation',
function(&$self, $data)
+ foreach($taxonomies as $taxonomyId => $varName)
{
- if ($data)
+ $taxonomyTerms[$varName] = array();
+ foreach(QubitFlatfileImport::getTaxonomyTerms($taxonomyId) as $termId =>
$term)
{
- $cvaToQubit = array(
- 'Text' => 'Textual record',
- 'Photograph' => 'Graphic material',
- 'ArchitecturalPlan' => 'Architectural drawing',
- 'Audio' => 'Sound recording',
- 'MovingImage' => 'Moving images',
- 'Map' => 'Cartographic material'
- );
-
- if (isset($cvaToQubit[$data]))
- {
- $materialTermName = $cvaToQubit[$data];
- if (in_array($materialTermName, $self->getStatus('materialTypes')))
- {
- $termId = array_search($materialTermName,
$self->getStatus('materialTypes'));
- // lookup material taxonomy ID from materialTypes
- $self->rowStatusVars['radGeneralMaterialDesignation'] = $termId;
- } else {
- die('Could not find "'. $materialTermName .'" in materialTypes
array.');
- }
- } else {
- die('Could not find a way to handle radGeneralMaterialDesignation
value "'. $data .'".');
- }
+ $taxonomyTerms[$varName][$termId] = $term->name;
}
- });
+ }
- $import->csv($fh);
+ return $taxonomyTerms;
}
}
--
You received this message because you are subscribed to the Google Groups
"Qubit Toolkit Commits" group.
To post to this group, send email to [email protected].
To unsubscribe from this group, send email to
[email protected].
For more options, visit this group at
http://groups.google.com/group/qubit-commits?hl=en.