Author: david
Date: Fri Feb 10 20:42:37 2012
New Revision: 10859
Log:
Do populate hierarchically to allow optimizing for inheritted properites
(repository, collectionRoot, referenceCode)
Modified:
trunk/lib/QubitSearchPdo.class.php
trunk/plugins/sfSearchPlugin/lib/task/xfPopulateTask.class.php
Modified: trunk/lib/QubitSearchPdo.class.php
==============================================================================
--- trunk/lib/QubitSearchPdo.class.php Fri Feb 10 14:44:00 2012 (r10858)
+++ trunk/lib/QubitSearchPdo.class.php Fri Feb 10 20:42:37 2012 (r10859)
@@ -19,10 +19,15 @@
class QubitSearchPdo extends QubitSearch
{
+ protected
+ $counter;
+
protected static
- $collectionRoot,
- $levelsOfDescription = array(),
- $repository;
+ $conn,
+ $infoObjectCount,
+ $levelsOfDescription,
+ $startTime,
+ $statement;
public function qubitPopulate($options)
{
@@ -33,9 +38,9 @@
return;
}
- $conn = Propel::getConnection();
+ self::$conn = Propel::getConnection();
- $start = microtime(true);
+ self::$startTime = microtime(true);
$this->getLogger()->log('Populating index...', $this->getName());
// if we are using an offset to resume from a segfault, optimize the index
instead of deleting
@@ -61,7 +66,7 @@
{
// Get count of all actors
$sql = 'SELECT COUNT(*) from '.QubitActor::TABLE_NAME;
- $rs = $conn->query($sql);
+ $rs = self::$conn->query($sql);
$rowcount = $rs->fetchColumn(0);
// Get actors (with offset)
@@ -85,7 +90,7 @@
self::addActorIndex($actor);
- $this->getLogger()->log('"'.$actor->__toString().'" inserted
('.round(microtime(true) - $start, 2).'s) ('.($key + $actorOffset +
1).'/'.$rowcount.')', $this->getName());
+ $this->getLogger()->log('"'.$actor->__toString().'" inserted
('.round(microtime(true) - self::$startTime, 2).'s) ('.($key + $actorOffset +
1).'/'.$rowcount.')', $this->getName());
}
}
else
@@ -98,18 +103,36 @@
{
// Get count of all information objects
$sql = 'SELECT COUNT(*) FROM '.QubitInformationObject::TABLE_NAME;
- $rs = $conn->query($sql);
- $rowcount = $rs->fetchColumn(0);
+ $rs = self::$conn->query($sql);
+ self::$infoObjectCount = $rs->fetchColumn(0);
$offset = 0;
if (0 < intval($ioOffset))
{
- $offset = intval($ioOffset);
+ $options['ioOffset'] = intval($ioOffset);
}
- // Temp limit
- $limit = 10000;
+ if ($i == 0 && 0 < $options['ioOffset'])
+ {
+ //$this->getLogger()->log("Ignoring first $offset information
objects.");
+ }
+ $this->counter = 0;
+ $this->indexInformationObjects(array(), $options);
+ }
+ else
+ {
+ $this->getLogger()->log('Information objects are ignored.');
+ }
+
+ $this->getLogger()->log('Index populated in "'.round(microtime(true) -
self::$startTime, 2).'" seconds.', $this->getName());
+ }
+
+ public function indexInformationObjects($ancestors, $options = array())
+ {
+ // Cache the select statement
+ if (!isset(self::$statement))
+ {
// Get info objects (with offset)
$sql = 'SELECT
io.*,
@@ -117,66 +140,74 @@
slug.slug,
pubs.status_id as publication_status_id';
$sql .= ' FROM '.QubitInformationObject::TABLE_NAME.' io';
- $sql .= ' JOIN '.QubitInformationObjectI18n::TABLE_NAME.' i18n';
- $sql .= ' ON io.id = i18n.id';
- $sql .= ' JOIN '.QubitSlug::TABLE_NAME.' slug';
- $sql .= ' ON io.id = slug.object_id';
- $sql .= ' JOIN '.QubitStatus::TABLE_NAME.' pubs';
- $sql .= ' ON io.id = pubs.object_id';
- $sql .= ' WHERE
- pubs.type_id = :pub_status_type_id';
+ $sql .= ' JOIN '.QubitInformationObjectI18n::TABLE_NAME.' i18n
+ ON io.id = i18n.id';
+ $sql .= ' JOIN '.QubitSlug::TABLE_NAME.' slug
+ ON io.id = slug.object_id';
+ $sql .= ' JOIN '.QubitStatus::TABLE_NAME.' pubs
+ ON io.id = pubs.object_id';
+ $sql .= ' WHERE io.parent_id = :parentId
+ AND pubs.type_id = :pubStatusTypeId';
$sql .= ' ORDER BY io.lft';
- $sql .= ' LIMIT '.$offset.', '.$limit;
+ //$sql .= ' LIMIT '.$offset.', '.$limit;
+
+ self::$statement = self::$conn->prepare($sql);
+ }
+
+ if (0 < count($ancestors))
+ {
+ $parentId = $ancestors[count($ancestors)-1]->id;
+ }
+ else
+ {
+ $parentId = QubitInformationObject::ROOT_ID;
+ }
- $statement = $conn->prepare($sql);
- $statement->execute(array(
- ':pub_status_type_id' => QubitTerm::STATUS_TYPE_PUBLICATION_ID));
+ self::$statement->execute(array(
+ ':parentId' => $parentId,
+ ':pubStatusTypeId' => QubitTerm::STATUS_TYPE_PUBLICATION_ID));
- if ($i == 0 && 0 < $offset)
+ foreach (self::$statement->fetchAll(PDO::FETCH_OBJ) as $i => $resource)
+ {
+ $this->counter++;
+
+ // Set repository
+ if (null != $resource->repository_id)
{
- $this->getLogger()->log("Ignoring first $offset information objects.");
+ $options['repository'] =
QubitRepository::getById($resource->repository_id);
}
- // Loop through results, and add to search index
- $i = 0;
- while ($resource = $statement->fetch(PDO::FETCH_OBJ))
- {
- self::addInformationObjectIndex($resource, $resource->culture,
$options);
+ // Add to index
+ self::addInformationObjectIndex($resource, $ancestors, $options);
+
+ // Log
+ $this->getLogger()->log('"'.$resource->title.'" inserted
('.round(microtime(true) - self::$startTime, 2).'s)
('.($this->counter).'/'.self::$infoObjectCount.')', $this->getName());
- $this->getLogger()->log('"'.$resource->title.'" inserted
('.round(microtime(true) - $start, 2).'s) ('.($i + $offset +
1).'/'.$rowcount.')', $this->getName());
- $i++;
+ // If not a leaf, then descend tree
+ if ((1 < intval($resource->rgt) - intval($resource->lft))
+ && $resource->culture == $resource->source_culture)
+ {
+ $this->indexInformationObjects(array_merge($ancestors,
array($resource)), $options);
}
}
- else
- {
- $this->getLogger()->log('Information objects are ignored.');
- }
-
- $this->getLogger()->log('Index populated in "'.round(microtime(true) -
$start, 2).'" seconds.', $this->getName());
}
- public static function addInformationObjectIndex($resource, $language,
$options = array())
+ public static function addInformationObjectIndex($resource, $ancestors,
$options = array())
{
- // Only ROOT node should have no parent, don't index
- if (null === $resource->parent_id)
- {
- return;
- }
-
$doc = new Zend_Search_Lucene_Document;
// Reference elements
$doc->addField(Zend_Search_Lucene_Field::Keyword('id', $resource->id));
$doc->addField(Zend_Search_Lucene_Field::Keyword('slug', $resource->slug));
- $doc->addField(Zend_Search_Lucene_Field::Keyword('culture', $language));
+ $doc->addField(Zend_Search_Lucene_Field::Keyword('culture',
$resource->culture));
$doc->addField(Zend_Search_Lucene_Field::Keyword('className',
'QubitInformationObject'));
$doc->addField(Zend_Search_Lucene_Field::UnIndexed('parent_id',
$resource->parent_id));
$doc->addField(Zend_Search_Lucene_Field::Keyword('parent',
$resource->parent->slug));
// Identifier
- $identifierField = Zend_Search_Lucene_Field::Text('identifier',
$resource->identifier());
+ $identifierField = Zend_Search_Lucene_Field::Text('identifier',
$resource->identifier);
$identifierField->boost = 5;
$doc->addField($identifierField);
@@ -219,45 +250,25 @@
$doc->addField(Zend_Search_Lucene_Field::Unstored('locationofcopies',
$resource->location_of_copies));
$doc->addField(Zend_Search_Lucene_Field::Unstored('relatedunitsofdescription',
$resource->related_units_of_description));
- // Set collection root
- if (QubitInformationObject::ROOT_ID == $resource->parent_id &&
- (!isset(self::$collectionRoot) || self::$collectionRoot->id !=
$resource->id))
- {
- self::$collectionRoot = $resource;
- }
-
- if (isset(self::$collectionRoot))
- {
- $doc->addField(Zend_Search_Lucene_Field::UnIndexed('collectionRootId',
self::$collectionRoot->id));
- $doc->addField(Zend_Search_Lucene_Field::Keyword('collectionRootSlug',
self::$collectionRoot->slug));
-
- // Do fallback if current culture is not the same as the collectionRoot
- $crTitle = self::$collectionRoot->title;
- if (0 == strlen($crTitle) || self::$collectionRoot->culture != $language)
- {
- $crTitle = self::getFallbackTitle(self::$collectionRoot->id,
array('culture' => $language));
- }
-
-
$doc->addField(Zend_Search_Lucene_Field::UnIndexed('collectionRootTitle',
$crTitle));
- }
- else
- {
- throw new sfException('No collection root for "'.$resource->title.'"');
- }
-
- // Set repository
- if (null != $resource->repository_id &&
- (!isset(self::$repository) || self::$repository->id !=
$resource->repository_id))
+ // Collection root
+ $doc->addField(Zend_Search_Lucene_Field::UnIndexed('collectionRootId',
$ancestors[0]->id));
+ $doc->addField(Zend_Search_Lucene_Field::Keyword('collectionRootSlug',
$ancestors[0]->slug));
+
+ // Do fallback if current culture is not the same as the collectionRoot
+ $collectionRoot = (0 < count($ancestors)) ? $ancestors[0] : $resource;
+ $crTitle = $collectionRooot->title;
+ if (0 == strlen($crTitle) || $collectionRoot->culture !=
$resource->culture)
{
- self::$repository = QubitRepository::getById($resource->repository_id);
+ $crTitle = self::getFallbackTitle($collectionRoot->id, array('culture'
=> $resource->culture));
}
+ $doc->addField(Zend_Search_Lucene_Field::UnIndexed('collectionRootTitle',
$crTitle));
// Add repository
- if (isset(self::$repository))
+ if (isset($options['repository']))
{
- $doc->addField(Zend_Search_Lucene_Field::Keyword('repositoryId',
self::$repository->id));
- $doc->addField(Zend_Search_Lucene_Field::Keyword('repositorySlug',
self::$repository->slug));
- $doc->addField(Zend_Search_Lucene_Field::Text('repositoryName',
self::$repository->getAuthorizedFormOfName(array('culture' => $language))));
+ $doc->addField(Zend_Search_Lucene_Field::Keyword('repositoryId',
$options['repository']->id));
+ $doc->addField(Zend_Search_Lucene_Field::Keyword('repositorySlug',
$options['repository']->slug));
+ $doc->addField(Zend_Search_Lucene_Field::Text('repositoryName',
$options['repository']->getAuthorizedFormOfName(array('culture' =>
$language))));
}
else
{
@@ -403,7 +414,6 @@
protected static function getFallbackTitle($id, $options = array())
{
$params = array($id);
- $conn = Propel::getConnection();
// Get info objects (with offset)
$sql = 'SELECT i18n.title';
@@ -422,7 +432,7 @@
$sql .= ' AND node.source_culture = i18n.culture';
}
- $statement = $conn->prepare($sql);
+ $statement = self::$conn->prepare($sql);
$statement->execute($params);
$result = $statement->fetch(PDO::FETCH_OBJ);
Modified: trunk/plugins/sfSearchPlugin/lib/task/xfPopulateTask.class.php
==============================================================================
--- trunk/plugins/sfSearchPlugin/lib/task/xfPopulateTask.class.php Fri Feb
10 14:44:00 2012 (r10858)
+++ trunk/plugins/sfSearchPlugin/lib/task/xfPopulateTask.class.php Fri Feb
10 20:42:37 2012 (r10859)
@@ -64,6 +64,6 @@
$index = new $index;
$index->setLogger(new xfLoggerTask($this->dispatcher, $this->formatter));
$index->qubitPopulate($options);
- $index->optimize();
+ // $index->optimize();
}
}
--
You received this message because you are subscribed to the Google Groups
"Qubit Toolkit Commits" group.
To post to this group, send email to [email protected].
To unsubscribe from this group, send email to
[email protected].
For more options, visit this group at
http://groups.google.com/group/qubit-commits?hl=en.