Author: mj
Date: Thu Mar 29 13:46:14 2012
New Revision: 11309

Log:
Issue 2199.  Modify ES plugin to use PDO methods for reading information 
objects when running qubitPopulate (ie. the way David's 
QubitSearchInformationObject class works).  Known issues: does not work 
correctly for full I18n, same must be implemented for actors.  Update 
xfPopulateTask to use -v flag and hide per-object output by default for better 
performance.

Added:
   
trunk/plugins/qtElasticSearchPlugin/lib/model/QubitPdoInformationObject.class.php
      - copied, changed from r11306, 
trunk/lib/QubitSearchInformationObject.class.php
Modified:
   trunk/plugins/qtDominionPlugin/modules/search/actions/indexAction.class.php
   trunk/plugins/qtDominionPlugin/modules/search/templates/_searchResults.php
   
trunk/plugins/qtElasticSearchPlugin/lib/model/QubitInformationObjectMapping.class.php
   trunk/plugins/qtElasticSearchPlugin/lib/qtElasticSearchPlugin.class.php
   trunk/plugins/sfSearchPlugin/lib/task/xfPopulateTask.class.php

Modified: 
trunk/plugins/qtDominionPlugin/modules/search/actions/indexAction.class.php
==============================================================================
--- trunk/plugins/qtDominionPlugin/modules/search/actions/indexAction.class.php 
Thu Mar 29 11:38:46 2012        (r11308)
+++ trunk/plugins/qtDominionPlugin/modules/search/actions/indexAction.class.php 
Thu Mar 29 13:46:14 2012        (r11309)
@@ -186,7 +186,7 @@
               }
               break;
 
-            case 'dates.start':
+            case 'dates.startDate':
               $facets[strtr($name, '.', '_')] = $facet['ranges'][0];  // 
FIXME: is this the best way?
 
               break;
@@ -279,8 +279,8 @@
       $query->addFacet($facet);
     }
 
-    $facet = new Elastica_Facet_Range('dates.start');
-    $facet->setField('dates.start');
+    $facet = new Elastica_Facet_Range('dates.startDate');
+    $facet->setField('dates.startDate');
     $facet->addRange(null, null);
     $query->addFacet($facet);
 

Modified: 
trunk/plugins/qtDominionPlugin/modules/search/templates/_searchResults.php
==============================================================================
--- trunk/plugins/qtDominionPlugin/modules/search/templates/_searchResults.php  
Thu Mar 29 11:38:46 2012        (r11308)
+++ trunk/plugins/qtDominionPlugin/modules/search/templates/_searchResults.php  
Thu Mar 29 13:46:14 2012        (r11309)
@@ -102,7 +102,7 @@
         <h2 class="desktoponly"><?php echo __('Creation Date'); ?></h2>
 
         <div class="scrollable dates" id="dates">
-          <input type="text" value="<?php echo 
$pager->facets['dates_start']['min'] ?>" name="from" /> - <input type="text" 
value="<?php echo $pager->facets['dates_start']['max'] ?>" name="to" />
+          <input type="text" value="<?php echo 
$pager->facets['dates_startDate']['min'] ?>" name="from" /> - <input 
type="text" value="<?php echo $pager->facets['dates_startDate']['max'] ?>" 
name="to" />
         </div>
       </div><!-- /.section -->
       
@@ -245,7 +245,7 @@
             </p>
 
             <p>
-              <?php echo Qubit::renderDateStartEnd(null, 
$doc['dates'][0]['start'], $doc['dates'][0]['end']); ?>
+              <?php echo Qubit::renderDateStartEnd(null, 
$doc['dates'][0]['startDate'], $doc['dates'][0]['endDate']); ?>
               <?php if 
(!empty($doc[$doc['sourceCulture']]['creator'][0]['name'])): ?>
                 <?php echo __('by %1%', // FIXME: ARRAY ENUMERATION
                   array('%1%' => 
$doc[$sf_user->getCulture()]['creator'][0]['name'] ?: 
$doc[$doc['sourceCulture']]['creator'][0]['name'])); ?>

Modified: 
trunk/plugins/qtElasticSearchPlugin/lib/model/QubitInformationObjectMapping.class.php
==============================================================================
--- 
trunk/plugins/qtElasticSearchPlugin/lib/model/QubitInformationObjectMapping.class.php
       Thu Mar 29 11:38:46 2012        (r11308)
+++ 
trunk/plugins/qtElasticSearchPlugin/lib/model/QubitInformationObjectMapping.class.php
       Thu Mar 29 13:46:14 2012        (r11309)
@@ -105,8 +105,8 @@
         // NB: this doesn't work correctly on 3-date ranges, eg. 1999-2000, 
2001
         preg_match("/^.*(?P<start>\d{4}).*(?P<end>\d{4}?).*/", $rendered, 
$matches);
 
-        $dateIndex['start'] = $matches['start'];
-        $dateIndex['end'] = $matches['end'];
+        $dateIndex['startDate'] = $matches['start'];
+        $dateIndex['endDate'] = $matches['end'];
         $dateIndex['typeId'] = $date->getType()->id;
 
         if (isset($date->actor))

Copied and modified: 
trunk/plugins/qtElasticSearchPlugin/lib/model/QubitPdoInformationObject.class.php
 (from r11306, trunk/lib/QubitSearchInformationObject.class.php)
==============================================================================
--- trunk/lib/QubitSearchInformationObject.class.php    Thu Mar 29 00:03:24 
2012        (r11306, copy source)
+++ 
trunk/plugins/qtElasticSearchPlugin/lib/model/QubitPdoInformationObject.class.php
   Thu Mar 29 13:46:14 2012        (r11309)
@@ -18,14 +18,13 @@
  */
 
 /**
- * Manage information objects in search index
+ * Lightweight version of QubitInformationObject which uses PDO directly 
instead of the Propel ORM
  *
- * @package    Qubit
- * @subpackage QubitSearch
- * @author     David Juhasz <[email protected]>
+ * @package    qtElasticSearchPlugin
+ * @author     MJ Suhonos <[email protected]>
  * @version    SVN: $Id$
  */
-class QubitSearchInformationObject
+class QubitPdoInformationObject
 {
   public
     $ancestors,
@@ -61,7 +60,7 @@
       'extent_and_medium',
       'class_name',
       'collection_root_slug',
-      'culture',
+//      'culture',
       'finding_aids',
       'has_digital_object',
       'identifier',
@@ -99,7 +98,7 @@
   /**
    * METHODS
    */
-  public function __construct($id, $culture, $options = array())
+  public function __construct($id, $options = array())
   {
     if (isset($options['conn']))
     {
@@ -111,7 +110,7 @@
       self::$conn = Propel::getConnection();
     }
 
-    $this->loadData($id, $culture, $options);
+    $this->loadData($id, $options);
 
     // Get inherited ancestors
     if (isset($options['ancestors']))
@@ -122,11 +121,8 @@
     // Get inherited repository, unless a repository is set at current level
     if (isset($options['repository']) && !$this->__isset('repository_id'))
     {
-      $resource->repository = $options['repository'];
+      $this->repository = $options['repository'];
     }
-
-    $this->index = QubitSearch::getInstance()->getEngine()->getIndex();
-    $this->doc = new Zend_Search_Lucene_Document;
   }
 
   public function __isset($name)
@@ -152,7 +148,7 @@
     $this->data[$name] = $value;
   }
 
-  protected function loadData($id, $culture, $options = array())
+  protected function loadData($id)
   {
     if (!isset(self::$statements['informationObject']))
     {
@@ -172,8 +168,8 @@
          ON io.id = pubstat.object_id
        LEFT JOIN '.QubitDigitalObject::TABLE_NAME.' do
          ON io.id = do.information_object_id
-       WHERE io.id = :id
-         AND i18n.culture = :culture';
+       WHERE io.id = :id';
+//         AND i18n.culture = :culture';
 
       self::$statements['informationObject'] = self::$conn->prepare($sql);
     }
@@ -181,7 +177,7 @@
     // Do select
     self::$statements['informationObject']->execute(array(
       ':id' => $id,
-      ':culture' => $culture));
+));//      ':culture' => $culture));
 
     // Get first result
     $this->data = 
self::$statements['informationObject']->fetch(PDO::FETCH_ASSOC);
@@ -196,273 +192,6 @@
     return $this;
   }
 
-  public function addToIndex()
-  {
-    // Pre-populate
-    $this->getAncestors();
-    $this->getRepository();
-    $this->getLanguagesAndScripts();
-
-    // Add fields
-    foreach (self::$fields as $name)
-    {
-      $this->addField($name);
-    }
-
-    $this->addDocument();
-  }
-
-  public function addDocument()
-  {
-    $this->index->addDocument($this->doc);
-  }
-
-  protected function addField($name)
-  {
-    $camelName = lcfirst(sfInflector::camelize($name));
-    $field = $value = null;
-
-    switch ($name)
-    {
-      case 'class_name':
-        $field = Zend_Search_Lucene_Field::Keyword($camelName, 
'QubitInformationObject');
-
-        break;
-
-      case 'collection_root_slug':
-        $field = Zend_Search_Lucene_Field::Keyword($camelName, 
$this->getCollectionRoot()->slug);
-
-        break;
-
-      case 'creator':
-        $names = array();
-        foreach ($this->getActors(array('typeId' => QubitTerm::CREATION_ID)) 
as $item)
-        {
-          if (isset($item->authorized_form_of_name))
-          {
-            $names[] = $item->authorized_form_of_name;
-          }
-        }
-
-        // Add field
-        $field = Zend_Search_Lucene_Field::Unstored($camelName, implode(' ', 
$names));
-        $field->boost = 8; // Boost the relevance
-
-        break;
-
-      case 'creator_history':
-        $histories = array();
-        foreach ($this->getActors(array('typeId' => QubitTerm::CREATION_ID)) 
as $item)
-        {
-          if (isset($item->history))
-          {
-            $names[] = $item->history;
-          }
-        }
-        $field = Zend_Search_Lucene_Field::Unstored($camelName, implode(' ', 
$histories));
-
-        break;
-
-      // Serialized creator data for creating links in search results
-      case 'creator_serialized':
-        $creators = array();
-        foreach ($this->getActors(array('typeId' => QubitTerm::CREATION_ID)) 
as $item)
-        {
-          $creators[] = array(
-            'name' => $item->authorized_form_of_name,
-            'slug' => $item->slug
-          );
-        }
-
-        $field = Zend_Search_Lucene_Field::UnIndexed($camelName, 
serialize($creators));
-
-        break;
-
-      // Serialized date array for display in search results
-      case 'date_serialized':
-        $field = Zend_Search_Lucene_Field::UnIndexed($camelName, 
serialize($this->getDates('array')));
-
-        break;
-
-      case 'has_digital_object':
-        $field = Zend_Search_Lucene_Field::Keyword($camelName, 
$this->__isset('digital_object_id') ? 'true' : 'false');
-
-        break;
-
-      case 'identifier':
-        $field = Zend_Search_Lucene_Field::Unstored($camelName, 
$this->__get('identifier'));
-        $field->boost = 5;
-
-        break;
-
-      case 'language':
-        if (0 < count($this->languages))
-        {
-          $value = implode(' ', $this->languages);
-        }
-
-        $field = Zend_Search_Lucene_Field::Unstored($camelName, $value);
-
-        break;
-
-      case 'level_of_description':
-        $field = Zend_Search_Lucene_Field::Text($camelName, 
$this->getLevelOfDescription());
-
-        break;
-
-      case 'material_type_id':
-        $field = Zend_Search_Lucene_Field::Unstored($camelName, 
$this->getMaterialTypeId());
-
-        break;
-
-      case 'media_type':
-        $field = Zend_Search_Lucene_Field::Unstored($camelName, 
$this->getMediaTypeName());
-
-        break;
-
-      case 'thumbnail_path':
-        $field = Zend_Search_Lucene_Field::UnIndexed($camelName, 
$this->getThumbnailPath());
-
-        break;
-
-      case 'name':
-        $field = Zend_Search_Lucene_Field::Unstored($camelName, 
$this->getNameAccessPoints());
-        $field->boost = 3;
-
-        break;
-
-      case 'notes':
-        $field = Zend_Search_Lucene_Field::Unstored($camelName, 
$this->getNotes());
-
-        break;
-
-      case 'parent':
-        $field = Zend_Search_Lucene_Field::Unstored($camelName, 
$this->ancestors[count($this->ancestors)-1]->slug);
-
-        break;
-
-      case 'part_of':
-        $field = Zend_Search_Lucene_Field::Text($camelName, 
$this->getCollectionRoot()->getTitle(array('culture' => 
$this->__get('culture'))));
-
-        break;
-
-      case 'physical_storage':
-        $field = Zend_Search_Lucene_Field::Unstored($camelName, 
$this->getStorageNames());
-
-        break;
-
-      case 'place':
-        $field = Zend_Search_Lucene_Field::Unstored($camelName, 
$this->getPlaceAccessPoints());
-        $field->boost = 3;
-
-        break;
-
-      case 'reference_code':
-        $field = Zend_Search_Lucene_Field::Text($camelName, 
$this->getReferenceCode());
-
-        break;
-
-      case 'repository':
-        if (isset($this->repository))
-        {
-          $value = $this->repository->getAuthorizedFormOfName(array('culture' 
=> $this->__get('culture'), 'fallback' => true));
-        }
-
-        $field = Zend_Search_Lucene_Field::Text($camelName, $value);
-
-        break;
-
-      case 'repository_id':
-        if (isset($this->repository))
-        {
-          $value = $this->repository->id;
-        }
-
-        $field = Zend_Search_Lucene_Field::Keyword($camelName, $value);
-
-        break;
-
-      case 'repository_slug':
-        if (isset($this->repository))
-        {
-          $value = $this->repository->slug;
-        }
-
-        $field = Zend_Search_Lucene_Field::Keyword($camelName, $value);
-
-        break;
-
-      case 'subject':
-        $field = Zend_Search_Lucene_Field::Unstored($camelName, 
$this->getSubjectAccessPoints());
-        $field->boost = 5;
-
-        break;
-
-      case 'script':
-        if (0 < count($this->scripts))
-        {
-          $value = implode(' ', $this->scripts);
-        }
-
-        $field = Zend_Search_Lucene_Field::Unstored($camelName, $value);
-
-        break;
-
-      case 'title':
-        $value = $this->__get('title');
-        if (0 == strlen($value))
-        {
-          // Include an i18n fallback for proper search result display in case 
the
-          // title field was not translated
-          $value = $this->getFallbackTitle();
-        }
-
-        $field = Zend_Search_Lucene_Field::Text($camelName, $value);
-        $field->boost = 10;
-
-        break;
-
-      // DATES
-      case 'start_date':
-      case 'end_date':
-      case 'date':
-        $this->doc->addField(Zend_Search_Lucene_Field::Unstored($camelName, 
implode(' ', $this->getDates($name))));
-
-        break;
-
-      // TEXT fields
-      case 'scope_and_content':
-        $field = Zend_Search_Lucene_Field::Text($camelName, 
$this->__get($name));
-
-        break;
-
-      // KEYWORD fields (internal ids, slugs, etc.)
-      case 'culture':
-      case 'id':
-      case 'media_type_id':
-      case 'publication_status_id':
-      case 'slug':
-        if ($this->__isset($name))
-        {
-          $field = Zend_Search_Lucene_Field::Keyword($camelName, 
$this->__get($name));
-        }
-
-        break;
-
-      // UNSTORED fields
-      default:
-        if ($this->__isset($name))
-        {
-          $field = Zend_Search_Lucene_Field::Unstored($camelName, 
$this->__get($name));
-        }
-    }
-
-    if (isset($field))
-    {
-      $this->doc->addField($field);
-    }
-  }
-
   /**
    * Return an array of ancestors
    *
@@ -544,6 +273,26 @@
     return QubitPdo::fetchOne($sql, array($this->__get('id')));
   }
 
+  public function hasChildren()
+  {
+  }
+
+  public function getCreators()
+  {
+    $creators = array();
+
+    foreach ($this->getActors(array('typeId' => QubitTerm::CREATION_ID)) as 
$item)
+    {
+      $creators[] = array(
+        'id' => $item->id,
+        'culture' => $item->culture,
+        'name' => $item->authorized_form_of_name,
+        'history' => $item->history
+      );
+    }
+    return $creators;
+  }
+
   public function getLevelOfDescription()
   {
     if (!isset(self::$lookups['levelOfDescription']))
@@ -559,7 +308,7 @@
     if 
(isset(self::$lookups['levelOfDescription'][$this->__get('level_of_description_id')]))
     {
       return 
self::$lookups['levelOfDescription'][$this->__get('level_of_description_id')]->getName(array(
-        'culture' => $this->__get('culture'), 
+        'culture' => $this->__get('culture'),
         'fallback' => true));
     }
   }
@@ -584,7 +333,7 @@
     if (isset(self::$lookups['mediaType'][$this->__get('media_type_id')]))
     {
       return 
self::$lookups['mediaType'][$this->__get('media_type_id')]->getName(array(
-        'culture' => $this->__get('culture'), 
+        'culture' => $this->__get('culture'),
         'fallback' => true));
     }
   }
@@ -645,6 +394,7 @@
                   act_slug.slug,
                   act_i18n.authorized_form_of_name,
                   act_i18n.history,
+                  act_i18n.culture,
                   i18n.date';
       $sql .= ' FROM '.QubitEvent::TABLE_NAME.' event';
       $sql .= ' JOIN '.QubitEventI18n::TABLE_NAME.' i18n
@@ -653,22 +403,22 @@
                   ON event.actor_id = act_i18n.id';
       $sql .= ' LEFT JOIN '.QubitSlug::TABLE_NAME.' act_slug
                   ON event.actor_id = act_slug.object_id';
-      $sql .= ' WHERE event.information_object_id = ?
-                  AND i18n.culture = ?
-                  AND (act_i18n.id IS NULL OR act_i18n.culture = ?)';
+      $sql .= ' WHERE event.information_object_id = ?';
+//                  AND i18n.culture = ?
+//                  AND (act_i18n.id IS NULL OR act_i18n.culture = ?)';
 
       self::$statements['event'] = self::$conn->prepare($sql);
     }
 
     self::$statements['event']->execute(array(
-      $this->__get('id'),
-      $this->__get('culture'),
-      $this->__get('culture')));
+      $this->__get('id')));
+//      $this->__get('culture'),
+//      $this->__get('culture')));
 
     return self::$statements['event']->fetchAll(PDO::FETCH_OBJ);
   }
 
-  protected function getDates($field)
+  public function getDates($field)
   {
     $dates = array();
 
@@ -699,11 +449,25 @@
           case 'array':
             if (isset($item->date) || isset($item->start_date) || 
isset($item->end_date))
             {
+                $rendered = Qubit::renderDateStartEnd($item->date, 
$item->start_date, $item->end_date);
+
+                // try to extract two 4-digit years
+                // NB: this doesn't work correctly on 3-date ranges, eg. 
1999-2000, 2001
+                preg_match("/^.*(?P<start>\d{4}).*(?P<end>\d{4}?).*/", 
$rendered, $matches);
+
+                $item->start_date = $matches['start'];
+                $item->end_date = $matches['end'];
+/*
+                if (isset($date->actor))
+                {
+                  $dateIndex['actor'] = $date->actor->__toString();
+                }
+*/
               $dates[] = array(
-                'date' => $item->date,
-                'start_date' => $item->start_date,
-                'end_date' => $item->end_date,
-                'type_id' => $item->type_id);
+//                'date' => $item->date,
+                'startDate' => $item->start_date,
+                'endDate' => $item->end_date,
+                'typeId' => $item->type_id);
             }
 
             break;
@@ -732,6 +496,8 @@
 
           $actor = new stdClass();
 
+          $actor->id = $item->actor_id;
+          $actor->culture = $item->culture;
           $actor->authorized_form_of_name = $item->authorized_form_of_name;
           $actor->slug = $item->slug;
           $actor->history = $item->history;
@@ -1043,4 +809,135 @@
       return implode(' ', $names);
     }
   }
-}
+
+    // Serialize yaself!  Don' disrespec yaself
+    public function serialize()
+    {
+      $serialized = array();
+
+      $serialized['slug'] = $this->slug;
+      $serialized['referenceCode'] = $this->getReferenceCode();
+      $serialized['identifier'] = $this->identifier;
+
+      $serialized['levelOfDescriptionId'] = $this->level_of_description_id;
+      $serialized['publicationStatusId'] = $this->publication_status_id;
+
+      // hierarchy information as arrays
+      $this->getAncestors();
+      $serialized['parentId'] = 
$this->ancestors[count($this->ancestors)-1]->id;
+
+      // NB: this will include the ROOT_ID
+      foreach ($this->ancestors as $ancestor)
+      {
+        $serialized['ancestors'][] = $ancestor->id;
+      }
+
+      if ($this->hasChildren())
+      {
+        // NB: this should be an ordered array
+        foreach ($this->getChildren() as $child)
+        {
+          $serialized['children'][] = $child->id;
+        }
+      }
+
+      // embed digital object information
+      if ($this->media_type_id)
+      {
+        $serialized['digitalObject']['mediaTypeId'] = $this->media_type_id;
+        $serialized['digitalObject']['thumbnail_FullPath'] = 
$this->getThumbnailPath();
+      }
+
+      $serialized['dates'] = $this->getDates('array');
+
+      // Repository (actor)
+      if ($repository = $this->getRepository(array('inherit' => 
empty($this->repositoryId))))
+      {
+        $repoI18ns = $repository->actorI18ns->indexBy('culture');
+        $serializedI18ns = QubitMapping::serializeI18ns(new QubitActor(), 
$repoI18ns);
+
+        $serialized['repository'] = array('id' => $repository->id, 'i18n' => 
$serializedI18ns);
+      }
+
+      // Subject access points (terms)
+      foreach ($this->getSubjectAccessPoints() as $subject)
+      {
+        $term = $subject->getTerm();
+
+        $subjectI18ns = $term->termI18ns->indexBy('culture');
+        $serializedI18ns = QubitMapping::serializeI18ns(new QubitTerm(), 
$subjectI18ns);
+
+        $serialized['subjects'][] = array('id' => $subject->id, 'i18n' => 
$serializedI18ns);
+      }
+
+      // Place access points (terms)
+      foreach ($this->getPlaceAccessPoints() as $place)
+      {
+        $term = $place->getTerm();
+
+        $placeI18ns = $term->termI18ns->indexBy('culture');
+        $serializedI18ns = QubitMapping::serializeI18ns(new QubitTerm(), 
$placeI18ns);
+
+        $serialized['places'][] = array('id' => $place->id, 'i18n' => 
$serializedI18ns);
+
+      }
+
+      // Name access points (actors)
+      foreach ($this->getNameAccessPoints() as $name)
+      {
+        $nameI18ns = $name->object->actorI18ns->indexBy('culture');
+        $serializedI18ns = QubitMapping::serializeI18ns(new QubitActor(), 
$nameI18ns);
+
+        $serialized['names'][] = array('id' => $name->object->id, 'i18n' => 
$serializedI18ns);
+      }
+
+      // Creators (actors)
+      foreach ($this->getCreators() as $creator)
+      {
+/*
+        $creatorI18ns = $creator->actorI18ns->indexBy('culture');
+        $serializedI18ns = QubitMapping::serializeI18ns(new QubitActor(), 
$creatorI18ns);
+
+        $serialized['creators'][] = array('id' => $creator->id, 'i18n' => 
$serializedI18ns);
+*/
+        // FIXME: obviously this doesn't handle I18n properly
+        $serialized['creators'][] = array('id' => $creator['id'], 'i18n' => 
array(
+                                    array('authorizedFormOfName' => 
$creator['name'],
+                                          'history' => $creator['history'],
+                                          'culture' => $creator['culture'])
+        ));
+      }
+
+      // Notes
+      foreach ($this->getNotes() as $note)
+      {
+        $noteI18ns = $note->noteI18ns->indexBy('culture');
+        $serializedI18ns = QubitMapping::serializeI18ns(new QubitNote(), 
$noteI18ns);
+
+        $serialized['notes'][] = array('id' => $note->id, 'i18n' => 
$serializedI18ns);
+      }
+
+      $serialized['sourceCulture'] = $this->source_culture;
+
+      foreach(QubitMapping::getI18nFields('QubitInformationObject') as 
$camelName)
+      {
+        $fieldName = sfInflector::underscore($camelName);
+
+        if (!empty($this->data[$fieldName]))
+        {
+          $I18ns['culture'] = 'en'; // FIXME: OBVIOUSLY THIS IS A BAD HACK
+          $I18ns[lcfirst($camelName)] = $this->data[$fieldName];
+        }
+      }
+
+      $serialized['i18n'] = array($I18ns);
+//var_dump($serialized);
+//var_dump($this->data); exit;
+
+//      $thisI18ns = $this->informationObjectI18ns->indexBy('culture');
+//      $serialized['i18n'] = QubitMapping::serializeI18ns($this, $thisI18ns);
+
+      return $serialized;
+    }
+
+}
\ No newline at end of file

Modified: 
trunk/plugins/qtElasticSearchPlugin/lib/qtElasticSearchPlugin.class.php
==============================================================================
--- trunk/plugins/qtElasticSearchPlugin/lib/qtElasticSearchPlugin.class.php     
Thu Mar 29 11:38:46 2012        (r11308)
+++ trunk/plugins/qtElasticSearchPlugin/lib/qtElasticSearchPlugin.class.php     
Thu Mar 29 13:46:14 2012        (r11309)
@@ -39,10 +39,12 @@
 
   public $index = null;
 
-  /*
-   * Enable singleton creation via getInstance()
-   */
-  protected static $_instance;
+  // Enable singleton creation via getInstance()
+  protected static
+    $_instance,
+    $conn,
+    $statements,
+    $counter = 0;
 
   public static function getInstance()
   {
@@ -193,11 +195,8 @@
   {
     
sfContext::createInstance(sfProjectConfiguration::getApplicationConfiguration('qubit',
 'cli', true));
 
-    $start = microtime(true);
-    $this->logger->log('Populating index...', 'qtElasticSearch');
-
-    // if we are using an offset to resume from a segfault, optimize the index 
instead of deleting
-    if (!isset($options['actorOffset']) && !isset($options['ioOffset']) && 
!isset($options['termOffset']))
+    // if we are skipping existing objects, optimize the index instead of 
deleting
+    if (!isset($options['skip']))
     {
       $this->index->delete();
       $this->initialize();
@@ -211,120 +210,65 @@
     // set buffering and updates to be batched for better performance
     $this->enableBatch();
 
-    $termOffset = intval($options['termOffset']);
-    $actorOffset = intval($options['actorOffset']);
-    $ioOffset = intval($options['ioOffset']);
-    $repoOffset = intval($options['repoOffset']);
+    $this->timer = new QubitTimer;
+    $this->logger->log('Populating index...', 'qtElasticSearch');
     $total = 0;
 
-    // index terms
-    if (-1 < $termOffset)
-    {
-      $criteria = new Criteria;
-      $criteria->add(QubitTerm::ID, QubitTerm::ROOT_ID, Criteria::NOT_EQUAL);
-      $criteria->add(QubitTerm::TAXONOMY_ID, array(QubitTaxonomy::SUBJECT_ID, 
QubitTaxonomy::PLACE_ID), Criteria::IN);
+    // terms
+    $criteria = new Criteria;
+    $criteria->add(QubitTerm::ID, QubitTerm::ROOT_ID, Criteria::NOT_EQUAL);
+    $criteria->add(QubitTerm::TAXONOMY_ID, array(QubitTaxonomy::SUBJECT_ID, 
QubitTaxonomy::PLACE_ID), Criteria::IN);
 
-      if (0 < $termOffset)
-      {
-        $criteria->setOffset($termOffset);
-        $this->logger->log('Ignoring first '.$termOffset.' terms.', 
'qtElasticSearch');
-      }
+    $terms = QubitTerm::get($criteria);
+    $total = $total + count($terms);
 
-      $terms = QubitTerm::get($criteria);
-      $rowcount = count($terms) + $termOffset;
-      $total = $total + $rowcount;
+    foreach ($terms as $key => $term)
+    {
+      $this->save($term);
 
-      foreach ($terms as $key => $term)
+      if ($options['verbose'])
       {
-        $this->save($term);
-        $this->logger->log('"'.$term->__toString().'" inserted 
('.round(microtime(true) - $start, 2).'s) ('.($key + $termOffset + 
1).'/'.$rowcount.')', 'qtElasticSearch');
+        $this->logger->log('"'.$term->__toString().'" inserted 
('.$this->timer->elapsed().'s) ('.($key+1).'/'.count($terms).')', 
'qtElasticSearch::QubitTerm');
       }
     }
-    else
-    {
-      $this->logger->log('Terms are ignored.', 'qtElasticSearch');
-    }
 
-    // index actors
-    if (-1 < $actorOffset)
-    {
-      $criteria = new Criteria;
-      $criteria->add(QubitActor::ID, QubitActor::ROOT_ID, Criteria::NOT_EQUAL);
-      $criteria = QubitActor::addGetOnlyActorsCriteria($criteria);
+    // repositories
+    $criteria = new Criteria;
+    $criteria->add(QubitRepository::ID, QubitRepository::ROOT_ID, 
Criteria::NOT_EQUAL);
 
-      if (0 < $actorOffset)
-      {
-        $criteria->setOffset($actorOffset);
-        $this->logger->log('Ignoring first '.$actorOffset.' actors.', 
'qtElasticSearch');
-      }
+    $repositories = QubitRepository::get($criteria);
+    $total = $total + count($repositories);
 
-      $actors = QubitActor::get($criteria);
-      $rowcount = count($actors) + $actorOffset;
-      $total = $total + $rowcount;
+    foreach ($repositories as $key => $repository)
+    {
+      $this->save($repository);
 
-      foreach ($actors as $key => $actor)
+      if ($options['verbose'])
       {
-        $this->save($actor);
-        $this->logger->log('"'.$actor->__toString().'" inserted 
('.round(microtime(true) - $start, 2).'s) ('.($key + $actorOffset + 
1).'/'.$rowcount.')', 'qtElasticSearch');
+        $this->logger->log('"'.$repository->__toString().'" inserted 
('.$this->timer->elapsed().'s) ('.($key+1).'/'.count($repositories).')', 
'qtElasticSearch::QubitRepository');
       }
     }
-    else
-    {
-      $this->logger->log('Actors are ignored.', 'qtElasticSearch');
-    }
-
-    // index repositories
-    if (-1 < $repoOffset)
-    {
-      $criteria = new Criteria;
-      $criteria->add(QubitRepository::ID, QubitRepository::ROOT_ID, 
Criteria::NOT_EQUAL);
 
-      if (0 < $repoOffset)
-      {
-        $criteria->setOffset($repoOffset);
-        $this->logger->log('Ignoring first '.$repoOffset.' repositories.', 
'qtElasticSearch');
-      }
+    // information objects
+    $total = $total + $this->populateInformationObjects($options);
 
-      $repositories = QubitRepository::get($criteria);
-      $rowcount = count($repositories) + $repoOffset;
-      $total = $total + $rowcount;
+    // index actors
+    // FIXME: replicate IO PDO mechanism to index actors
+    $criteria = new Criteria;
+    $criteria->add(QubitActor::ID, QubitActor::ROOT_ID, Criteria::NOT_EQUAL);
+    $criteria = QubitActor::addGetOnlyActorsCriteria($criteria);
 
-      foreach ($repositories as $key => $repository)
-      {
-        $this->save($repository);
-        $this->logger->log('"'.$repository->__toString().'" inserted 
('.round(microtime(true) - $start, 2).'s) ('.($key + $repoOffset + 
1).'/'.$rowcount.')', 'qtElasticSearch');
-      }
-    }
-    else
-    {
-      $this->logger->log('Repositories are ignored.', 'qtElasticSearch');
-    }
+    $actors = QubitActor::get($criteria);
+    $total = $total + count($actors);
 
-    // index information objects
-    if (-1 < $ioOffset)
+    foreach ($actors as $key => $actor)
     {
-      $criteria = new Criteria;
-      $criteria->add(QubitInformationObject::ID, 
QubitInformationObject::ROOT_ID, Criteria::NOT_EQUAL);
+      $this->save($actor);
 
-      if (0 < $ioOffset)
+      if ($options['verbose'])
       {
-        $criteria->setOffset($ioOffset);
-        $this->logger->log('Ignoring first '.$ioOffset.' information 
objects.', 'qtElasticSearch');
+        $this->logger->log('"'.$actor->__toString().'" inserted 
('.$this->timer->elapsed().'s) ('.($key+1).'/'.count($actors).')', 
'qtElasticSearch::QubitActor');
       }
-
-      $informationObjects = QubitInformationObject::get($criteria);
-      $rowcount = count($informationObjects) + $ioOffset;
-      $total = $total + $rowcount;
-
-      foreach ($informationObjects as $key => $informationObject)
-      {
-        $this->save($informationObject);
-        $this->logger->log('"'.$informationObject->__toString().'" inserted 
('.round(microtime(true) - $start, 2).'s) ('.($key + $ioOffset + 
1).'/'.$rowcount.')', 'qtElasticSearch');
-      }
-    }
-    else
-    {
-      $this->logger->log('Information objects are ignored.', 
'qtElasticSearch');
     }
 
     // if there are still documents in the batch queue, send them
@@ -335,7 +279,7 @@
       $this->batchDocs = array();
     }
 
-    $this->logger->log('Index populated with "'.($total).'" documents in 
"'.round(microtime(true) - $start, 2).'" seconds.', 'qtElasticSearch');
+    $this->logger->log('Index populated with "'.($total).'" documents in 
"'.$this->timer->elapsed().'" seconds.', 'qtElasticSearch');
   }
 
   /*
@@ -356,4 +300,133 @@
     }
   }
 
+  /*
+   * PORTED FROM QUBITSEARCH CLASS
+   */
+
+  public function populateInformationObjects($options = array())
+  {
+    if (!isset(self::$conn))
+    {
+      self::$conn = Propel::getConnection();
+    }
+
+    // Get count of all information objects
+    $sql  = 'SELECT COUNT(*)';
+    $sql .= ' FROM '.QubitInformationObject::TABLE_NAME;
+    $sql .= ' WHERE id > ?';
+
+    $totalRows = QubitPdo::fetchColumn($sql, 
array(QubitInformationObject::ROOT_ID));
+
+    // Recursively descend down hierarchy
+    $this->recursivelyAddInformationObjects(QubitInformationObject::ROOT_ID, 
$totalRows, $options);
+
+    return $totalRows;
+  }
+
+  public function recursivelyAddInformationObjects($parentId, $totalRows, 
$options = array())
+  {
+    // Get information objects
+    if (!isset(self::$statements['getChildren']))
+    {
+      $sql  = 'SELECT
+                  io.id,
+                  io.lft,
+                  io.rgt,
+                  i18n.culture,
+                  i18n.title';
+      $sql .= ' FROM '.QubitInformationObject::TABLE_NAME.' io';
+      $sql .= ' JOIN '.QubitInformationObjectI18n::TABLE_NAME.' i18n
+                  ON io.id = i18n.id';
+      $sql .= ' WHERE io.parent_id = ?';
+      $sql .= ' ORDER BY io.lft';
+
+      self::$statements['getChildren'] = self::$conn->prepare($sql);
+    }
+
+    self::$statements['getChildren']->execute(array($parentId));
+
+    // Loop through results, and add to search index
+    foreach (self::$statements['getChildren']->fetchAll(PDO::FETCH_OBJ) as 
$item)
+    {
+      // 9266 IOs in 12.3s (750 /s)
+      $object = new QubitPdoInformationObject($item->id);
+
+      // empty: 9266 IOs in 78.0s (119 /s) --> 58.8 w/o logging (158/s)
+      // full: 9266 IOs in 209s (44/s)
+      $serialized = $object->serialize();
+/*
+      if 
($this->array_compare($this->serialize(QubitInformationObject::getById($item->id)),
 $serialized))
+      {
+        // WARNING: PDO object is not serialized correctly
+      }
+*/
+      // 9266 IOs in 221s (42/s)
+      $document = new Elastica_Document($object->id, $serialized);
+      $document->setType('QubitInformationObject');
+
+      // add this document to the batch queue
+      $this->batchDocs[] = $document;
+
+      // if we have a full batch, send in bulk
+      if (count($this->batchDocs) >= $this->batchSize)
+      {
+        $this->index->addDocuments($this->batchDocs);
+        $this->index->refresh();
+
+        $this->batchDocs = array();
+      }
+
+      // Log it
+      self::$counter++;
+
+      if ($options['verbose'])
+      {
+        $this->logger->log('"'.$item->title.'" inserted 
('.$this->timer->elapsed().'s) ('.self::$counter.'/'.$totalRows.')', 
'qtElasticSearch::QubitInformationObject');
+      }
+
+      // Descend hierarchy
+      if (1 < ($item->rgt - $item->lft))
+      {
+        // Pass ancestors and repository down to descendants
+        $this->recursivelyAddInformationObjects($item->id, $totalRows, array(
+          'ancestors'  => array_merge($object->getAncestors(), array($object)),
+          'repository' => $object->getRepository()));
+      }
+
+    }
+  }
+
+  public function array_compare($array1, $array2) {
+    $diff = false;
+    // Left-to-right
+    foreach ($array1 as $key => $value) {
+      if (!array_key_exists($key,$array2)) {
+        $diff[0][$key] = $value;
+      } elseif (is_array($value)) {
+        if (!is_array($array2[$key])) {
+          $diff[0][$key] = $value;
+          $diff[1][$key] = $array2[$key];
+        } else {
+          $new = $this->array_compare($value, $array2[$key]);
+          if ($new !== false) {
+            if (isset($new[0])) $diff[0][$key] = $new[0];
+            if (isset($new[1])) $diff[1][$key] = $new[1];
+          };
+        };
+      } elseif ($array2[$key] !== $value) {
+        $diff[0][$key] = $value;
+        $diff[1][$key] = $array2[$key];
+      };
+    };
+    // Right-to-left
+    foreach ($array2 as $key => $value) {
+      if (!array_key_exists($key,$array1)) {
+        $diff[1][$key] = $value;
+      };
+      // No direct comparsion because matching keys were compared in the
+      // left-to-right loop earlier, recursively.
+    };
+    return $diff;
+  }
 }
\ No newline at end of file

Modified: trunk/plugins/sfSearchPlugin/lib/task/xfPopulateTask.class.php
==============================================================================
--- trunk/plugins/sfSearchPlugin/lib/task/xfPopulateTask.class.php      Thu Mar 
29 11:38:46 2012        (r11308)
+++ trunk/plugins/sfSearchPlugin/lib/task/xfPopulateTask.class.php      Thu Mar 
29 13:46:14 2012        (r11309)
@@ -28,6 +28,7 @@
       new sfCommandOption('application', null, 
sfCommandOption::PARAMETER_OPTIONAL, 'The application name', 'qubit'),
       new sfCommandOption('env', null, sfCommandOption::PARAMETER_REQUIRED, 
'The environment', 'cli'),
 //      new sfCommandOption('optimize', 'o', sfCommandOption::PARAMETER_NONE, 
'If passed, the index is optimized after population'),
+      new sfCommandOption('verbose', 'v', sfCommandOption::PARAMETER_NONE, 'If 
passed, progress is displayed for each object indexed'),
       new sfCommandOption('skip', 'k', sfCommandOption::PARAMETER_REQUIRED, 
'Only skip "actors" or "io" (information objects)')));
 
     $this->namespace = 'search';

-- 
You received this message because you are subscribed to the Google Groups 
"Qubit Toolkit Commits" group.
To post to this group, send email to [email protected].
To unsubscribe from this group, send email to 
[email protected].
For more options, visit this group at 
http://groups.google.com/group/qubit-commits?hl=en.

Reply via email to