Author: david
Date: Thu Dec 8 14:33:47 2011
New Revision: 10380
Log:
Copy and modify lib/task/digitalObjectLoadTask.class.php for use with
Archivematica DIPs
Added:
trunk/lib/task/archivematicaDipImportTask.class.php (contents, props
changed)
Modified:
trunk/lib/QubitSearch.class.php
Modified: trunk/lib/QubitSearch.class.php
==============================================================================
--- trunk/lib/QubitSearch.class.php Thu Dec 8 13:26:13 2011 (r10379)
+++ trunk/lib/QubitSearch.class.php Thu Dec 8 14:33:47 2011 (r10380)
@@ -21,7 +21,7 @@
{
// allow disabling search index via boolean flag
- public $disabled = false;
+ public $disabled = true;
/*
* Enable singleton creation via getInstance()
Added: trunk/lib/task/archivematicaDipImportTask.class.php
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ trunk/lib/task/archivematicaDipImportTask.class.php Thu Dec 8 14:33:47
2011 (r10380)
@@ -0,0 +1,197 @@
+<?php
+
+/*
+ * This file is part of Qubit Toolkit.
+ *
+ * Qubit Toolkit is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Qubit Toolkit is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Qubit Toolkit. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/**
+ * Process a CSV file to link digital objects from an Archivematica DIP to
+ * existing information objects in ICA-AtoM
+ *
+ * @package symfony
+ * @subpackage task
+ * @author David Juhasz <[email protected]>
+ * @version SVN: $Id$
+ */
+class archivematicaDipImportTask extends sfBaseTask
+{
+ protected static
+ $count = 0;
+
+ /**
+ * @see sfTask
+ */
+ protected function configure()
+ {
+ $this->addArguments(array(
+ new sfCommandArgument('filename', sfCommandArgument::REQUIRED, 'The
input file (csv format).')
+ ));
+
+ $this->addOptions(array(
+ new sfCommandOption('application', null,
sfCommandOption::PARAMETER_OPTIONAL, 'The application name', true),
+ new sfCommandOption('env', null, sfCommandOption::PARAMETER_REQUIRED,
'The environment', 'cli'),
+ new sfCommandOption('connection', null,
sfCommandOption::PARAMETER_REQUIRED, 'The connection name', 'propel'),
+ new sfCommandOption('path', 'p', sfCommandOption::PARAMETER_OPTIONAL,
'Path to digital objects. If a path is not specified then csv filenames are
assumed to include an absolute path', null),
+ ));
+
+ $this->namespace = 'archivematica';
+ $this->name = 'dip-import';
+ $this->briefDescription = 'Process a CSV file to link digital objects from
an Archivematica DIP to existing information objects in ICA-AtoM';
+
+ $this->detailedDescription = <<<EOF
+Process a CSV file to link digital objects from an Archivematica DIP to
existing information objects in ICA-AtoM.
+
+The CSV file must contain two columns, in the following order:
+1) The identifier of the information object
+2) The filename (TIFF) of the original digital object
+
+The import script expects that Archivematica will have modified the file name
so:
+a) The file has been converted to a JPEG and uses the ".jpg" extension
+b) A UUID has been pre-pended to the filename
+
+Note: Filenames must be unique (without UUID) within "path" to avoid colliding
on import.
+EOF;
+ }
+
+ /**
+ * @see sfTask
+ */
+ public function execute($arguments = array(), $options = array())
+ {
+ $databaseManager = new sfDatabaseManager($this->configuration);
+ $conn = $databaseManager->getDatabase('propel')->getConnection();
+
+ sfConfig::set('app_upload_dir', self::getUploadDir($conn));
+
+ if (false === $fh = fopen($arguments['filename'], 'rb'))
+ {
+ throw new sfException('You must specify a valid filename');
+ }
+
+ $this->logSection("Link digital objects from {$arguments['filename']}...");
+
+ // Get header (first) row
+ $header = fgetcsv($fh, 1000);
+
+ if (in_array('information_object_id', $header) && in_array('filename',
$header))
+ {
+ $idKey = array_search('information_object_id', $header);
+ $fileKey = array_search('filename', $header);
+ }
+ else
+ {
+ // Assume first column is information_object_id and second is filename
+ $idKey = 0;
+ $fileKey = 1;
+ }
+
+ // Build hash on information_object.id, with array value if information
+ // object has multiple digital objects attached
+ while ($item = fgetcsv($fh, 1000))
+ {
+ $filepath = $item[$fileKey];
+ if (isset($options['path']))
+ {
+ $filepath = rtrim($options['path'], '/').'/'.$filepath;
+ }
+
+ if (!isset($digitalObjects[$item[$idKey]]))
+ {
+ $digitalObjects[$item[$idKey]] = $filepath;
+ }
+ else if (!is_array($digitalObjects[$item[$idKey]]))
+ {
+ $digitalObjects[$item[$idKey]] = array($digitalObjects[$item[$idKey]],
$filepath);
+ }
+ else
+ {
+ $digitalObjects[$item[$idKey]][] = $filepath;
+ }
+ }
+
+ // Loop through $digitalObject hash and add digital objects to db
+ foreach ($digitalObjects as $key => $item)
+ {
+ if (null === $informationObject = QubitInformationObject::getById($key))
+ {
+ $this->log("Invalid information_object id $key");
+
+ continue;
+ }
+
+ if (!is_array($item))
+ {
+ self::addDigitalObject($informationObject, $item, $conn);
+ }
+ else
+ {
+ // If more than one digital object linked to this information object
+ for ($i=0; $i < count($item); $i++)
+ {
+ // Create new information objects, to maintain one-to-one
+ // relationship with digital objects
+ $informationObject = new QubitInformationObject;
+ $informationObject->parent = QubitInformationObject::getById($key);
+ $informationObject->title = basename($item[$i]);
+ $informationObject->save($conn);
+
+ self::addDigitalObject($informationObject, $item[$i], $conn);
+ }
+ }
+ }
+
+ $this->logSection('Successfully Loaded '.self::$count.' digital objects.');
+ }
+
+ protected function addDigitalObject($informationObject, $filepath, $conn)
+ {
+ // read file contents
+ if (!file_exists($filepath))
+ {
+ $this->log("Couldn't find file '$filepath'");
+
+ return;
+ }
+
+ $this->log("Loading '$filepath'");
+
+ // Create digital object
+ $do = new QubitDigitalObject;
+ $do->informationObject = $informationObject;
+ $do->usageId = QubitTerm::MASTER_ID;
+ $do->assets[] = new QubitAsset($filepath);
+ $do->save($conn);
+
+ self::$count++;
+ }
+
+ protected function getUploadDir($conn)
+ {
+ $uploadDir = 'uploads'; // Default value
+
+ $sql = 'SELECT i18n.value
+ FROM setting stg JOIN setting_i18n i18n ON stg.id = i18n.id
+ WHERE stg.source_culture = i18n.culture
+ AND stg.name = \'upload_dir\';';
+
+ if ($sth = $conn->query($sql))
+ {
+ list($uploadDir) = $sth->fetch();
+ }
+
+ return $uploadDir;
+ }
+}
--
You received this message because you are subscribed to the Google Groups
"Qubit Toolkit Commits" group.
To post to this group, send email to [email protected].
To unsubscribe from this group, send email to
[email protected].
For more options, visit this group at
http://groups.google.com/group/qubit-commits?hl=en.