Author: david
Date: Fri Feb 10 20:42:37 2012
New Revision: 10859

Log:
Do populate hierarchically to allow optimizing for inheritted properites 
(repository, collectionRoot, referenceCode)

Modified:
   trunk/lib/QubitSearchPdo.class.php
   trunk/plugins/sfSearchPlugin/lib/task/xfPopulateTask.class.php

Modified: trunk/lib/QubitSearchPdo.class.php
==============================================================================
--- trunk/lib/QubitSearchPdo.class.php  Fri Feb 10 14:44:00 2012        (r10858)
+++ trunk/lib/QubitSearchPdo.class.php  Fri Feb 10 20:42:37 2012        (r10859)
@@ -19,10 +19,15 @@
 
 class QubitSearchPdo extends QubitSearch
 {
+  protected
+    $counter;
+
   protected static
-    $collectionRoot,
-    $levelsOfDescription = array(),
-    $repository;
+    $conn,
+    $infoObjectCount,
+    $levelsOfDescription,
+    $startTime,
+    $statement;
 
   public function qubitPopulate($options)
   {
@@ -33,9 +38,9 @@
       return;
     }
 
-    $conn = Propel::getConnection();
+    self::$conn = Propel::getConnection();
 
-    $start = microtime(true);
+    self::$startTime = microtime(true);
     $this->getLogger()->log('Populating index...', $this->getName());
 
     // if we are using an offset to resume from a segfault, optimize the index 
instead of deleting
@@ -61,7 +66,7 @@
     {
       // Get count of all actors
       $sql = 'SELECT COUNT(*) from '.QubitActor::TABLE_NAME;
-      $rs = $conn->query($sql);
+      $rs = self::$conn->query($sql);
       $rowcount = $rs->fetchColumn(0);
 
       // Get actors (with offset)
@@ -85,7 +90,7 @@
 
         self::addActorIndex($actor);
 
-        $this->getLogger()->log('"'.$actor->__toString().'" inserted 
('.round(microtime(true) - $start, 2).'s) ('.($key + $actorOffset + 
1).'/'.$rowcount.')', $this->getName());
+        $this->getLogger()->log('"'.$actor->__toString().'" inserted 
('.round(microtime(true) - self::$startTime, 2).'s) ('.($key + $actorOffset + 
1).'/'.$rowcount.')', $this->getName());
       }
     }
     else
@@ -98,18 +103,36 @@
     {
       // Get count of all information objects
       $sql = 'SELECT COUNT(*) FROM '.QubitInformationObject::TABLE_NAME;
-      $rs = $conn->query($sql);
-      $rowcount = $rs->fetchColumn(0);
+      $rs = self::$conn->query($sql);
+      self::$infoObjectCount = $rs->fetchColumn(0);
 
       $offset = 0;
       if (0 < intval($ioOffset))
       {
-        $offset = intval($ioOffset);
+        $options['ioOffset'] = intval($ioOffset);
       }
 
-      // Temp limit
-      $limit = 10000;
+      if ($i == 0 && 0 < $options['ioOffset'])
+      {
+        //$this->getLogger()->log("Ignoring first $offset information 
objects.");
+      }
 
+      $this->counter = 0;
+      $this->indexInformationObjects(array(), $options);
+    }
+    else
+    {
+      $this->getLogger()->log('Information objects are ignored.');
+    }
+
+    $this->getLogger()->log('Index populated in "'.round(microtime(true) - 
self::$startTime, 2).'" seconds.', $this->getName());
+  }
+
+  public function indexInformationObjects($ancestors, $options = array())
+  {
+    // Cache the select statement
+    if (!isset(self::$statement))
+    {
       // Get info objects (with offset)
       $sql  = 'SELECT
                  io.*,
@@ -117,66 +140,74 @@
                  slug.slug,
                  pubs.status_id as publication_status_id';
       $sql .= ' FROM '.QubitInformationObject::TABLE_NAME.' io';
-      $sql .= ' JOIN '.QubitInformationObjectI18n::TABLE_NAME.' i18n';
-      $sql .= '   ON io.id = i18n.id';
-      $sql .= ' JOIN '.QubitSlug::TABLE_NAME.' slug';
-      $sql .= '   ON io.id = slug.object_id';
-      $sql .= ' JOIN '.QubitStatus::TABLE_NAME.' pubs';
-      $sql .= '   ON io.id = pubs.object_id';
-      $sql .= ' WHERE
-                  pubs.type_id = :pub_status_type_id';
+      $sql .= ' JOIN '.QubitInformationObjectI18n::TABLE_NAME.' i18n
+                  ON io.id = i18n.id';
+      $sql .= ' JOIN '.QubitSlug::TABLE_NAME.' slug
+                  ON io.id = slug.object_id';
+      $sql .= ' JOIN '.QubitStatus::TABLE_NAME.' pubs
+                  ON io.id = pubs.object_id';
+      $sql .= ' WHERE io.parent_id = :parentId
+                  AND pubs.type_id = :pubStatusTypeId';
       $sql .= ' ORDER BY io.lft';
-      $sql .= ' LIMIT '.$offset.', '.$limit;
+      //$sql .= ' LIMIT '.$offset.', '.$limit;
+
+      self::$statement = self::$conn->prepare($sql);
+    }
+
+    if (0 < count($ancestors))
+    {
+      $parentId = $ancestors[count($ancestors)-1]->id;
+    }
+    else
+    {
+      $parentId = QubitInformationObject::ROOT_ID;
+    }
 
-      $statement = $conn->prepare($sql);
-      $statement->execute(array(
-        ':pub_status_type_id' => QubitTerm::STATUS_TYPE_PUBLICATION_ID));
+    self::$statement->execute(array(
+      ':parentId' => $parentId,
+      ':pubStatusTypeId' => QubitTerm::STATUS_TYPE_PUBLICATION_ID));
 
-      if ($i == 0 && 0 < $offset)
+    foreach (self::$statement->fetchAll(PDO::FETCH_OBJ) as $i => $resource)
+    {
+      $this->counter++;
+
+      // Set repository
+      if (null != $resource->repository_id)
       {
-        $this->getLogger()->log("Ignoring first $offset information objects.");
+        $options['repository'] = 
QubitRepository::getById($resource->repository_id);
       }
 
-      // Loop through results, and add to search index
-      $i = 0;
-      while ($resource = $statement->fetch(PDO::FETCH_OBJ))
-      {
-        self::addInformationObjectIndex($resource, $resource->culture, 
$options);
+      // Add to index
+      self::addInformationObjectIndex($resource, $ancestors, $options);
+
+      // Log
+      $this->getLogger()->log('"'.$resource->title.'" inserted 
('.round(microtime(true) - self::$startTime, 2).'s) 
('.($this->counter).'/'.self::$infoObjectCount.')', $this->getName());
 
-        $this->getLogger()->log('"'.$resource->title.'" inserted 
('.round(microtime(true) - $start, 2).'s) ('.($i + $offset + 
1).'/'.$rowcount.')', $this->getName());
 
-        $i++;
+      // If not a leaf, then descend tree
+      if ((1 < intval($resource->rgt) - intval($resource->lft))
+        && $resource->culture == $resource->source_culture)
+      {
+        $this->indexInformationObjects(array_merge($ancestors, 
array($resource)), $options);
       }
     }
-    else
-    {
-      $this->getLogger()->log('Information objects are ignored.');
-    }
-
-    $this->getLogger()->log('Index populated in "'.round(microtime(true) - 
$start, 2).'" seconds.', $this->getName());
   }
 
-  public static function addInformationObjectIndex($resource, $language, 
$options = array())
+  public static function addInformationObjectIndex($resource, $ancestors, 
$options = array())
   {
-    // Only ROOT node should have no parent, don't index
-    if (null === $resource->parent_id)
-    {
-      return;
-    }
-
     $doc = new Zend_Search_Lucene_Document;
 
     // Reference elements
     $doc->addField(Zend_Search_Lucene_Field::Keyword('id', $resource->id));
     $doc->addField(Zend_Search_Lucene_Field::Keyword('slug', $resource->slug));
-    $doc->addField(Zend_Search_Lucene_Field::Keyword('culture', $language));
+    $doc->addField(Zend_Search_Lucene_Field::Keyword('culture', 
$resource->culture));
     $doc->addField(Zend_Search_Lucene_Field::Keyword('className', 
'QubitInformationObject'));
 
     $doc->addField(Zend_Search_Lucene_Field::UnIndexed('parent_id', 
$resource->parent_id));
     $doc->addField(Zend_Search_Lucene_Field::Keyword('parent', 
$resource->parent->slug));
 
     // Identifier
-    $identifierField = Zend_Search_Lucene_Field::Text('identifier', 
$resource->identifier());
+    $identifierField = Zend_Search_Lucene_Field::Text('identifier', 
$resource->identifier);
     $identifierField->boost = 5;
     $doc->addField($identifierField);
 
@@ -219,45 +250,25 @@
     $doc->addField(Zend_Search_Lucene_Field::Unstored('locationofcopies', 
$resource->location_of_copies));
     
$doc->addField(Zend_Search_Lucene_Field::Unstored('relatedunitsofdescription', 
$resource->related_units_of_description));
 
-    // Set collection root
-    if (QubitInformationObject::ROOT_ID == $resource->parent_id &&
-       (!isset(self::$collectionRoot) || self::$collectionRoot->id != 
$resource->id))
-    {
-      self::$collectionRoot = $resource;
-    }
-
-    if (isset(self::$collectionRoot))
-    {
-      $doc->addField(Zend_Search_Lucene_Field::UnIndexed('collectionRootId', 
self::$collectionRoot->id));
-      $doc->addField(Zend_Search_Lucene_Field::Keyword('collectionRootSlug', 
self::$collectionRoot->slug));
-
-      // Do fallback if current culture is not the same as the collectionRoot
-      $crTitle = self::$collectionRoot->title;
-      if (0 == strlen($crTitle) || self::$collectionRoot->culture != $language)
-      {
-        $crTitle = self::getFallbackTitle(self::$collectionRoot->id, 
array('culture' => $language));
-      }
-
-      
$doc->addField(Zend_Search_Lucene_Field::UnIndexed('collectionRootTitle', 
$crTitle));
-    }
-    else
-    {
-      throw new sfException('No collection root for "'.$resource->title.'"');
-    }
-
-    // Set repository
-    if (null != $resource->repository_id &&
-      (!isset(self::$repository) || self::$repository->id != 
$resource->repository_id))
+    // Collection root
+    $doc->addField(Zend_Search_Lucene_Field::UnIndexed('collectionRootId', 
$ancestors[0]->id));
+    $doc->addField(Zend_Search_Lucene_Field::Keyword('collectionRootSlug', 
$ancestors[0]->slug));
+
+    // Do fallback if current culture is not the same as the collectionRoot
+    $collectionRoot = (0 < count($ancestors)) ?  $ancestors[0] : $resource;
+    $crTitle = $collectionRooot->title;
+    if (0 == strlen($crTitle) || $collectionRoot->culture != 
$resource->culture)
     {
-      self::$repository = QubitRepository::getById($resource->repository_id);
+      $crTitle = self::getFallbackTitle($collectionRoot->id, array('culture' 
=> $resource->culture));
     }
+    $doc->addField(Zend_Search_Lucene_Field::UnIndexed('collectionRootTitle', 
$crTitle));
 
     // Add repository
-    if (isset(self::$repository))
+    if (isset($options['repository']))
     {
-      $doc->addField(Zend_Search_Lucene_Field::Keyword('repositoryId', 
self::$repository->id));
-      $doc->addField(Zend_Search_Lucene_Field::Keyword('repositorySlug', 
self::$repository->slug));
-      $doc->addField(Zend_Search_Lucene_Field::Text('repositoryName', 
self::$repository->getAuthorizedFormOfName(array('culture' => $language))));
+      $doc->addField(Zend_Search_Lucene_Field::Keyword('repositoryId', 
$options['repository']->id));
+      $doc->addField(Zend_Search_Lucene_Field::Keyword('repositorySlug', 
$options['repository']->slug));
+      $doc->addField(Zend_Search_Lucene_Field::Text('repositoryName', 
$options['repository']->getAuthorizedFormOfName(array('culture' => 
$language))));
     }
     else
     {
@@ -403,7 +414,6 @@
   protected static function getFallbackTitle($id, $options = array())
   {
     $params = array($id);
-    $conn = Propel::getConnection();
 
     // Get info objects (with offset)
     $sql  = 'SELECT i18n.title';
@@ -422,7 +432,7 @@
       $sql .= ' AND node.source_culture = i18n.culture';
     }
 
-    $statement = $conn->prepare($sql);
+    $statement = self::$conn->prepare($sql);
     $statement->execute($params);
     $result = $statement->fetch(PDO::FETCH_OBJ);
 

Modified: trunk/plugins/sfSearchPlugin/lib/task/xfPopulateTask.class.php
==============================================================================
--- trunk/plugins/sfSearchPlugin/lib/task/xfPopulateTask.class.php      Fri Feb 
10 14:44:00 2012        (r10858)
+++ trunk/plugins/sfSearchPlugin/lib/task/xfPopulateTask.class.php      Fri Feb 
10 20:42:37 2012        (r10859)
@@ -64,6 +64,6 @@
     $index = new $index;
     $index->setLogger(new xfLoggerTask($this->dispatcher, $this->formatter));
     $index->qubitPopulate($options);
-    $index->optimize();
+   // $index->optimize();
   }
 }

-- 
You received this message because you are subscribed to the Google Groups 
"Qubit Toolkit Commits" group.
To post to this group, send email to [email protected].
To unsubscribe from this group, send email to 
[email protected].
For more options, visit this group at 
http://groups.google.com/group/qubit-commits?hl=en.

Reply via email to