Author: david
Date: Mon Jul 9 16:07:22 2012
New Revision: 11878
Log:
Add Transform libraries to codebase. Add flag to preserve rows with no Level
of Description
Added:
trunk/lib/QubitCsvTransform.class.php (contents, props changed)
trunk/lib/QubitCsvTransformFactory.class.php (contents, props changed)
Modified:
trunk/lib/task/import/csvCustomImportTask.class.php
Added: trunk/lib/QubitCsvTransform.class.php
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ trunk/lib/QubitCsvTransform.class.php Mon Jul 9 16:07:22 2012
(r11878)
@@ -0,0 +1,170 @@
+<?
+
+class QubitCsvTransform extends QubitFlatfileImport {
+
+ public
+ $transformLogic,
+ $levelsOfDescription = array(
+ 'fonds',
+ 'collection',
+ 'sousfonds',
+ 'sous-fonds',
+ 'series',
+ 'subseries',
+ 'file',
+ 'item'
+ );
+
+
+ public function __construct($options = array())
+ {
+ if (
+ !isset($options['skipOptionsAndEnvironmentCheck'])
+ || $options['skipOptionsAndEnvironmentCheck'] == FALSE
+ )
+ {
+ $this->checkTaskOptionsAndEnvironment($options['options']);
+ }
+
+ // unset options not allowed in parent class
+ unset($options['skipOptionsAndEnvironmentCheck']);
+ if (isset($options['options']))
+ {
+ $cliOptions = $options['options'];
+ unset($options['options']);
+ }
+
+ // call parent class constructor
+ parent::__construct($options);
+
+ if (isset($options['transformLogic']))
+ {
+ $this->transformLogic = $options['transformLogic'];
+ }
+
+ if (isset($cliOptions)) {
+ $this->status['finalOutputFile'] = $cliOptions['output-file'];
+ $this->status['ignoreBadLod'] = $cliOptions['ignore-bad-lod'];
+ }
+
+ $this->status['headersWritten'] = FALSE;
+ }
+
+ protected function checkTaskOptionsAndEnvironment($options)
+ {
+ if (!$options['output-file'])
+ {
+ throw new sfException('You must specifiy the output-file option.');
+ }
+
+ if (!getenv("MYSQL_PASSWORD"))
+ {
+ //throw new sfException('You must set the MYSQL_PASSWORD environmental
variable. This script will use the "root" user and a database called
"import".');
+ }
+ }
+
+ function writeHeadersOnFirstPass()
+ {
+ if (!$this->status['headersWritten'])
+ {
+ fputcsv($this->status['outFh'], $this->columnNames);
+ $this->status['headersWritten'] = TRUE;
+ }
+ }
+
+ function initializeMySQLtemp()
+ {
+ $link = mysql_connect('localhost', 'root', getEnv("MYSQL_PASSWORD"));
+
+ if (!$link) throw new sfException('MySQL connection failed. Make sure the
MYSQL_PASSWORD environmental variable is set.');
+
+ $db = mysql_select_db('import', $link);
+
+ if (!$db) throw new sfException(
+ 'MySQL DB selection failed. Make sure a database called "import" exists.'
+ );
+
+ $sql = "CREATE TABLE IF NOT EXISTS import_descriptions (
+ id INT NOT NULL AUTO_INCREMENT,
+ sortorder INT,
+ data LONGTEXT,
+ PRIMARY KEY (id)
+ )";
+
+ $result = mysql_query($sql);
+
+ if (!$result) throw new sfException('MySQL create table failed.');
+
+ $result = mysql_query("DELETE FROM import_descriptions");
+ }
+
+ function addRowToMySQL($sortorder)
+ {
+ $sql = "INSERT INTO import_descriptions
+ (sortorder, data)
+ VALUES ('". mysql_real_escape_string($sortorder) ."',
+ '". mysql_real_escape_string(serialize($this->status['row'])) ."')";
+
+ $result = mysql_query($sql);
+
+ if (!$result)
+ {
+ throw new sfException('Failed to create MySQL DB row.');
+ }
+ }
+
+ function numberedFilePathVariation($filename, $number)
+ {
+ $parts = pathinfo($filename);
+ $base = $parts['filename'];
+ $path = $parts['dirname'];
+ return $path .'/'. $base .'_'. $number .'.'. $parts['extension'];
+ }
+
+ function writeMySQLRowsToCsvFilePath($filepath)
+ {
+ $chunk = 0;
+ $startFile = $this->numberedFilePathVariation($filepath, $chunk);
+ $fhOut = fopen($startFile, 'w');
+
+ if (!$fhOut) throw new sfException('Error writing to '. $startFile .'.');
+
+ print "Writing to ". $startFile ."...\n";
+
+ fputcsv($fhOut, $this->columnNames); // write headers
+
+ // cycle through DB, sorted by sort, and write CSV file
+ $sql = "SELECT data FROM import_descriptions ORDER BY sortorder";
+
+ $result = mysql_query($sql);
+
+ $currentRow = 1;
+
+ while($row = mysql_fetch_assoc($result))
+ {
+ // if starting a new chunk, write CSV headers
+ if (($currentRow % 1000) == 0)
+ {
+ $chunk++;
+ $chunkFilePath = $this->numberedFilePathVariation($filepath, $chunk);
+ $fhOut = fopen($chunkFilePath, 'w');
+
+ print "Writing to ". $chunkFilePath ."...\n";
+
+ fputcsv($fhOut, $this->columnNames); // write headers
+ }
+
+ $data = unserialize($row['data']);
+
+ // write to CSV out
+ fputcsv($fhOut, $data);
+
+ $currentRow++;
+ }
+ }
+
+ function levelOfDescriptionToSortorder($level)
+ {
+ return array_search(strtolower($level), $this->levelsOfDescription);
+ }
+}
Added: trunk/lib/QubitCsvTransformFactory.class.php
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ trunk/lib/QubitCsvTransformFactory.class.php Mon Jul 9 16:07:22
2012 (r11878)
@@ -0,0 +1,163 @@
+<?php
+
+class QubitCsvTransformFactory {
+
+ public $cliOptions;
+ public $machineName;
+ public $addColumns;
+ public $renameColumns;
+ public $parentKeyLogic;
+ public $rowParentKeyLookupLogic;
+ public $transformLogic;
+
+ public function __construct($options = array())
+ {
+ $allowedProperties = array(
+ 'cliOptions',
+ 'machineName',
+ 'addColumns',
+ 'renameColumns',
+ 'parentKeyLogic',
+ 'rowParentKeyLookupLogic',
+ 'transformLogic'
+ );
+
+ QubitFlatfileImport::setPropertiesFromArray(
+ $this,
+ $options,
+ $allowedProperties
+ );
+
+ if (!$this->machineName)
+ {
+ throw new sfException('The machineName property is required.');
+ }
+ }
+
+ public function make()
+ {
+ $tempCsvFile = sys_get_temp_dir() .'/'. $this->machineName .'_stage1.csv';
+
+ return new QubitCsvTransform(array(
+
+ 'options' => $this->cliOptions,
+
+ 'status' => array(
+ 'parentKeys' => array(),
+ 'noIdentifierCount' => 0,
+ 'tempFile' => $tempCsvFile,
+ 'outFh' => fopen($tempCsvFile, 'w'),
+ 'parentKeyLogic' => $this->parentKeyLogic,
+ 'rowParentKeyLookupLogic' => $this->rowParentKeyLookupLogic
+ ),
+
+ 'transformLogic' => $this->transformLogic,
+
+ 'addColumns' => $this->addColumns,
+
+ 'renameColumns' => $this->renameColumns,
+
+ 'saveLogic' => function(&$self)
+ {
+ $self->writeHeadersOnFirstPass();
+
+ if (isset($self->status['parentKeyLogic']))
+ {
+ $parentKey = trim($self->status['parentKeyLogic']($self));
+ if ($parentKey)
+ {
+ print "Stored parent key...\n";
+ $self->status['parentKeys'][$parentKey] =
$self->columnValue('legacyId');
+ }
+ }
+
+ if (isset($self->transformLogic))
+ {
+ $self->executeClosurePropertyIfSet('transformLogic');
+ }
+
+ fputcsv($self->status['outFh'], $self->status['row']);
+ },
+
+ 'completeLogic' => function(&$self)
+ {
+ print "Step 1 complete.\n";
+
+ $fhIn = fopen($self->status['tempFile'], 'r');
+
+ if (!$fhIn) throw new sfException('Error reading '.
$self->status['tempFile'] .'.');
+
+ $self->initializeMySQLtemp();
+
+ $stage2 = new QubitCsvTransform(array(
+
+ 'skipOptionsAndEnvironmentCheck' => TRUE,
+
+ 'status' => array(
+ 'finalOutputFile' => $self->status['finalOutputFile'],
+ 'parentKeys' => $self->status['parentKeys'],
+ 'badParents' => 0,
+ 'tempFile' => $self->status['tempFile'],
+ 'badLevelOfDescription' => 0,
+ 'rowParentKeyLookupLogic' =>
$self->status['rowParentKeyLookupLogic'],
+ 'ignoreBadLod' => $self->status['ignoreBadLod']
+ ),
+
+ 'errorLog' => $self->errorLog,
+
+ 'saveLogic' => function(&$self)
+ {
+ if (isset($self->status['rowParentKeyLookupLogic']))
+ {
+ $keyOfRowParent =
trim($self->status['rowParentKeyLookupLogic']($self));
+ if ($keyOfRowParent &&
isset($self->status['parentKeys'][$keyOfRowParent])) {
+ $parentId = $self->status['parentKeys'][$keyOfRowParent];
+ print "Found parent ID ". $parentId ."\n";
+ $self->columnValue('parentId', $parentId);
+ } else {
+ $self->status['badParents']++;
+ }
+ }
+
+ $levelOfDescriptionAvailable =
is_numeric(array_search('levelOfDescription', $self->columnNames));
+
+ if ($levelOfDescriptionAvailable)
+ {
+ print "Found a level of description...\n";
+
+ $sortorder =
$self->levelOfDescriptionToSortorder($self->columnValue('levelOfDescription'));
+
+ if (is_numeric($sortorder))
+ {
+ print "Description sort order is ". $sortorder .".\n";
+ $self->addRowToMySQL($sortorder);
+ }
+ else if (isset($self->status['ignoreBadLod']) &&
$self->status['ignoreBadLod'])
+ {
+ $sortorder = count($self->levelsOfDescription);
+ print "Description sort order is ". $sortorder .".\n";
+ $self->addRowToMySQL($sortorder);
+ } else {
+ $self->status['badLevelOfDescription']++;
+ print "Ignoring data with bad level of description: '".
$self->columnValue('levelOfDescription') . "'.\n";
+ }
+ } else {
+ $self->addRowToMySQL(0);
+ }
+ },
+
+ 'completeLogic' => function(&$self)
+ {
+
$self->writeMySQLRowsToCsvFilePath($self->status['finalOutputFile']);
+
+ print "Step 2 complete.\n";
+ print "Bad parents found: ". $self->status['badParents'] .".\n";
+ print "Bad level of description found: ".
$self->status['badLevelOfDescription'] .".\n";
+ }
+ ));
+
+ $stage2->csv($fhIn);
+ }
+ ));
+ }
+}
Modified: trunk/lib/task/import/csvCustomImportTask.class.php
==============================================================================
--- trunk/lib/task/import/csvCustomImportTask.class.php Mon Jul 9 14:50:25
2012 (r11877)
+++ trunk/lib/task/import/csvCustomImportTask.class.php Mon Jul 9 16:07:22
2012 (r11878)
@@ -45,7 +45,8 @@
$this->addOptions(array(
new sfCommandOption('import-definition', null,
sfCommandOption::PARAMETER_REQUIRED, 'PHP file defining and returning an import
object.'),
new sfCommandOption('output-file', null,
sfCommandOption::PARAMETER_OPTIONAL, 'Optional output file parameter which can
be referenced by import definition logic.'),
- new sfCommandOption('source-name', null,
sfCommandOption::PARAMETER_OPTIONAL, 'Source name to use when inserting keymap
entries.')
+ new sfCommandOption('source-name', null,
sfCommandOption::PARAMETER_OPTIONAL, 'Source name to use when inserting keymap
entries.'),
+ new sfCommandOption('ignore-bad-lod', null,
sfCommandOption::PARAMETER_NONE, 'Add rows with an unrecognized level of
description to end of file, instead of dropping them.')
));
}
--
You received this message because you are subscribed to the Google Groups
"Qubit Toolkit Commits" group.
To post to this group, send email to [email protected].
To unsubscribe from this group, send email to
[email protected].
For more options, visit this group at
http://groups.google.com/group/qubit-commits?hl=en.