Author: david
Date: Mon Jul  9 16:07:22 2012
New Revision: 11878

Log:
Add Transform libraries to codebase.  Add flag to preserve rows with no Level 
of Description

Added:
   trunk/lib/QubitCsvTransform.class.php   (contents, props changed)
   trunk/lib/QubitCsvTransformFactory.class.php   (contents, props changed)
Modified:
   trunk/lib/task/import/csvCustomImportTask.class.php

Added: trunk/lib/QubitCsvTransform.class.php
==============================================================================
--- /dev/null   00:00:00 1970   (empty, because file is newly added)
+++ trunk/lib/QubitCsvTransform.class.php       Mon Jul  9 16:07:22 2012        
(r11878)
@@ -0,0 +1,170 @@
+<?
+
+class QubitCsvTransform extends QubitFlatfileImport {
+
+  public
+    $transformLogic,
+    $levelsOfDescription = array(
+      'fonds',
+      'collection',
+      'sousfonds',
+      'sous-fonds',
+      'series',
+      'subseries',
+      'file',
+      'item'
+    );
+
+
+  public function __construct($options = array())
+  {
+    if (
+      !isset($options['skipOptionsAndEnvironmentCheck'])
+      || $options['skipOptionsAndEnvironmentCheck'] == FALSE
+    )
+    {
+      $this->checkTaskOptionsAndEnvironment($options['options']); 
+    }
+
+    // unset options not allowed in parent class
+    unset($options['skipOptionsAndEnvironmentCheck']);
+    if (isset($options['options']))
+    {
+      $cliOptions = $options['options'];
+      unset($options['options']);
+    }
+
+    // call parent class constructor
+    parent::__construct($options);
+
+    if (isset($options['transformLogic']))
+    {
+      $this->transformLogic = $options['transformLogic'];
+    }
+
+    if (isset($cliOptions)) {
+      $this->status['finalOutputFile'] = $cliOptions['output-file'];
+      $this->status['ignoreBadLod'] = $cliOptions['ignore-bad-lod'];
+    }
+
+    $this->status['headersWritten']  = FALSE;
+  }
+
+  protected function checkTaskOptionsAndEnvironment($options)
+  {
+    if (!$options['output-file'])
+    {
+      throw new sfException('You must specifiy the output-file option.');
+    }
+
+    if (!getenv("MYSQL_PASSWORD"))
+    {
+      //throw new sfException('You must set the MYSQL_PASSWORD environmental 
variable. This script will use the "root" user and a database called 
"import".');
+    }
+  }
+
+  function writeHeadersOnFirstPass()
+  {
+    if (!$this->status['headersWritten'])
+    {
+      fputcsv($this->status['outFh'], $this->columnNames);
+      $this->status['headersWritten'] = TRUE;
+    }
+  }
+
+  function initializeMySQLtemp()
+  {
+    $link = mysql_connect('localhost', 'root', getEnv("MYSQL_PASSWORD"));
+
+    if (!$link) throw new sfException('MySQL connection failed. Make sure the 
MYSQL_PASSWORD environmental variable is set.');
+
+    $db = mysql_select_db('import', $link);
+
+    if (!$db) throw new sfException(
+      'MySQL DB selection failed. Make sure a database called "import" exists.'
+    );
+
+    $sql = "CREATE TABLE IF NOT EXISTS import_descriptions (
+      id INT NOT NULL AUTO_INCREMENT,
+      sortorder INT,
+      data LONGTEXT,
+      PRIMARY KEY (id)
+    )";
+
+    $result = mysql_query($sql);
+
+    if (!$result) throw new sfException('MySQL create table failed.');
+
+    $result = mysql_query("DELETE FROM import_descriptions");
+  }
+
+  function addRowToMySQL($sortorder)
+  {
+    $sql = "INSERT INTO import_descriptions
+        (sortorder, data)
+        VALUES ('". mysql_real_escape_string($sortorder) ."',
+        '". mysql_real_escape_string(serialize($this->status['row'])) ."')";
+
+    $result = mysql_query($sql);
+
+    if (!$result)
+    {
+      throw new sfException('Failed to create MySQL DB row.');
+    }
+  }
+
+  function numberedFilePathVariation($filename, $number)
+  {
+    $parts     = pathinfo($filename);
+    $base      = $parts['filename'];
+    $path      = $parts['dirname'];
+    return $path .'/'. $base .'_'. $number .'.'. $parts['extension'];
+  }
+
+  function writeMySQLRowsToCsvFilePath($filepath)
+  {
+    $chunk = 0;
+    $startFile = $this->numberedFilePathVariation($filepath, $chunk);
+    $fhOut = fopen($startFile, 'w');
+
+    if (!$fhOut) throw new sfException('Error writing to '. $startFile .'.');
+
+    print "Writing to ". $startFile ."...\n";
+
+    fputcsv($fhOut, $this->columnNames); // write headers
+
+    // cycle through DB, sorted by sort, and write CSV file
+    $sql = "SELECT data FROM import_descriptions ORDER BY sortorder";
+
+    $result = mysql_query($sql);
+
+    $currentRow = 1;
+
+    while($row = mysql_fetch_assoc($result))
+    {
+      // if starting a new chunk, write CSV headers
+      if (($currentRow % 1000) == 0)
+      {
+        $chunk++;
+        $chunkFilePath = $this->numberedFilePathVariation($filepath, $chunk);
+        $fhOut = fopen($chunkFilePath, 'w');
+
+        print "Writing to ". $chunkFilePath ."...\n";
+
+        fputcsv($fhOut, $this->columnNames); // write headers
+      }
+
+      $data = unserialize($row['data']);
+
+      // write to CSV out
+      fputcsv($fhOut, $data);
+
+      $currentRow++;
+    }
+  }
+
+  function levelOfDescriptionToSortorder($level)
+  {
+    return array_search(strtolower($level), $this->levelsOfDescription);
+  }
+}

Added: trunk/lib/QubitCsvTransformFactory.class.php
==============================================================================
--- /dev/null   00:00:00 1970   (empty, because file is newly added)
+++ trunk/lib/QubitCsvTransformFactory.class.php        Mon Jul  9 16:07:22 
2012        (r11878)
@@ -0,0 +1,163 @@
+<?php
+
+class QubitCsvTransformFactory {
+
+  public $cliOptions;
+  public $machineName;
+  public $addColumns;
+  public $renameColumns;
+  public $parentKeyLogic;
+  public $rowParentKeyLookupLogic;
+  public $transformLogic;
+
+  public function __construct($options = array())
+  {
+    $allowedProperties = array(
+      'cliOptions',
+      'machineName',
+      'addColumns',
+      'renameColumns',
+      'parentKeyLogic',
+      'rowParentKeyLookupLogic',
+      'transformLogic'
+    );
+
+    QubitFlatfileImport::setPropertiesFromArray(
+      $this,
+      $options,
+      $allowedProperties
+    );
+
+    if (!$this->machineName)
+    {
+      throw new sfException('The machineName property is required.');
+    }
+  }
+
+  public function make()
+  {
+    $tempCsvFile = sys_get_temp_dir() .'/'. $this->machineName .'_stage1.csv';
+
+    return new QubitCsvTransform(array(
+
+      'options' => $this->cliOptions,
+
+      'status' => array(
+        'parentKeys'              => array(),
+        'noIdentifierCount'       => 0,
+        'tempFile'                => $tempCsvFile,
+        'outFh'                   => fopen($tempCsvFile, 'w'),
+        'parentKeyLogic'          => $this->parentKeyLogic,
+        'rowParentKeyLookupLogic' => $this->rowParentKeyLookupLogic
+      ),
+
+      'transformLogic' => $this->transformLogic,
+
+      'addColumns' => $this->addColumns,
+
+      'renameColumns' => $this->renameColumns,
+
+      'saveLogic' => function(&$self)
+      {
+        $self->writeHeadersOnFirstPass();
+
+        if (isset($self->status['parentKeyLogic']))
+        {
+          $parentKey = trim($self->status['parentKeyLogic']($self));
+          if ($parentKey)
+          {
+            print "Stored parent key...\n";
+            $self->status['parentKeys'][$parentKey] = 
$self->columnValue('legacyId');
+          }
+        }
+
+        if (isset($self->transformLogic))
+        {
+          $self->executeClosurePropertyIfSet('transformLogic');
+        }
+
+        fputcsv($self->status['outFh'], $self->status['row']);
+      },
+
+      'completeLogic' => function(&$self)
+      {
+        print "Step 1 complete.\n";
+
+        $fhIn = fopen($self->status['tempFile'], 'r');
+
+        if (!$fhIn) throw new sfException('Error reading '. 
$self->status['tempFile'] .'.');
+
+        $self->initializeMySQLtemp();
+
+        $stage2 = new QubitCsvTransform(array(
+
+          'skipOptionsAndEnvironmentCheck' => TRUE,
+
+          'status' => array(
+            'finalOutputFile'  => $self->status['finalOutputFile'],
+            'parentKeys'       => $self->status['parentKeys'],
+            'badParents'       => 0,
+            'tempFile'         => $self->status['tempFile'],
+            'badLevelOfDescription' => 0,
+            'rowParentKeyLookupLogic' => 
$self->status['rowParentKeyLookupLogic'],
+            'ignoreBadLod' => $self->status['ignoreBadLod']
+          ),
+
+          'errorLog' => $self->errorLog,
+
+          'saveLogic' => function(&$self)
+          {
+            if (isset($self->status['rowParentKeyLookupLogic']))
+            {
+              $keyOfRowParent = 
trim($self->status['rowParentKeyLookupLogic']($self));
+              if ($keyOfRowParent && 
isset($self->status['parentKeys'][$keyOfRowParent])) {
+                $parentId = $self->status['parentKeys'][$keyOfRowParent];
+                print "Found parent ID ". $parentId ."\n";
+                $self->columnValue('parentId', $parentId);
+              } else {
+                $self->status['badParents']++;
+              }
+            }
+
+            $levelOfDescriptionAvailable = 
is_numeric(array_search('levelOfDescription', $self->columnNames));
+
+            if ($levelOfDescriptionAvailable)
+            {
+              print "Found a level of description...\n";
+
+              $sortorder = 
$self->levelOfDescriptionToSortorder($self->columnValue('levelOfDescription'));
+
+              if (is_numeric($sortorder))
+              {
+                print "Description sort order is ". $sortorder .".\n";
+                $self->addRowToMySQL($sortorder);
+              }
+              else if (isset($self->status['ignoreBadLod']) && 
$self->status['ignoreBadLod'])
+              {
+                $sortorder = count($self->levelsOfDescription);
+                print "Description sort order is ". $sortorder .".\n";
+                $self->addRowToMySQL($sortorder);
+              } else {
+                $self->status['badLevelOfDescription']++;
+                print "Ignoring data with bad level of description: '". 
$self->columnValue('levelOfDescription') . "'.\n";
+              }
+            } else {
+              $self->addRowToMySQL(0);
+            }
+          },
+
+          'completeLogic' => function(&$self)
+          {
+            
$self->writeMySQLRowsToCsvFilePath($self->status['finalOutputFile']);
+
+            print "Step 2 complete.\n";
+            print "Bad parents found: ". $self->status['badParents'] .".\n";
+            print "Bad level of description found: ". 
$self->status['badLevelOfDescription'] .".\n";
+          }
+        ));
+
+        $stage2->csv($fhIn);
+      }
+    ));
+  }
+}

Modified: trunk/lib/task/import/csvCustomImportTask.class.php
==============================================================================
--- trunk/lib/task/import/csvCustomImportTask.class.php Mon Jul  9 14:50:25 
2012        (r11877)
+++ trunk/lib/task/import/csvCustomImportTask.class.php Mon Jul  9 16:07:22 
2012        (r11878)
@@ -45,7 +45,8 @@
     $this->addOptions(array(
       new sfCommandOption('import-definition', null, 
sfCommandOption::PARAMETER_REQUIRED, 'PHP file defining and returning an import 
object.'),
       new sfCommandOption('output-file', null, 
sfCommandOption::PARAMETER_OPTIONAL, 'Optional output file parameter which can 
be referenced by import definition logic.'),
-      new sfCommandOption('source-name', null, 
sfCommandOption::PARAMETER_OPTIONAL, 'Source name to use when inserting keymap 
entries.')
+      new sfCommandOption('source-name', null, 
sfCommandOption::PARAMETER_OPTIONAL, 'Source name to use when inserting keymap 
entries.'),
+      new sfCommandOption('ignore-bad-lod', null, 
sfCommandOption::PARAMETER_NONE, 'Add rows with an unrecognized level of 
description to end of file, instead of dropping them.')
     ));
   }
 

-- 
You received this message because you are subscribed to the Google Groups 
"Qubit Toolkit Commits" group.
To post to this group, send email to [email protected].
To unsubscribe from this group, send email to 
[email protected].
For more options, visit this group at 
http://groups.google.com/group/qubit-commits?hl=en.

Reply via email to