Author: mj
Date: Thu Mar 29 13:46:14 2012
New Revision: 11309
Log:
Issue 2199. Modify ES plugin to use PDO methods for reading information
objects when running qubitPopulate (ie. the way David's
QubitSearchInformationObject class works). Known issues: does not work
correctly for full I18n, same must be implemented for actors. Update
xfPopulateTask to use -v flag and hide per-object output by default for better
performance.
Added:
trunk/plugins/qtElasticSearchPlugin/lib/model/QubitPdoInformationObject.class.php
- copied, changed from r11306,
trunk/lib/QubitSearchInformationObject.class.php
Modified:
trunk/plugins/qtDominionPlugin/modules/search/actions/indexAction.class.php
trunk/plugins/qtDominionPlugin/modules/search/templates/_searchResults.php
trunk/plugins/qtElasticSearchPlugin/lib/model/QubitInformationObjectMapping.class.php
trunk/plugins/qtElasticSearchPlugin/lib/qtElasticSearchPlugin.class.php
trunk/plugins/sfSearchPlugin/lib/task/xfPopulateTask.class.php
Modified:
trunk/plugins/qtDominionPlugin/modules/search/actions/indexAction.class.php
==============================================================================
--- trunk/plugins/qtDominionPlugin/modules/search/actions/indexAction.class.php
Thu Mar 29 11:38:46 2012 (r11308)
+++ trunk/plugins/qtDominionPlugin/modules/search/actions/indexAction.class.php
Thu Mar 29 13:46:14 2012 (r11309)
@@ -186,7 +186,7 @@
}
break;
- case 'dates.start':
+ case 'dates.startDate':
$facets[strtr($name, '.', '_')] = $facet['ranges'][0]; //
FIXME: is this the best way?
break;
@@ -279,8 +279,8 @@
$query->addFacet($facet);
}
- $facet = new Elastica_Facet_Range('dates.start');
- $facet->setField('dates.start');
+ $facet = new Elastica_Facet_Range('dates.startDate');
+ $facet->setField('dates.startDate');
$facet->addRange(null, null);
$query->addFacet($facet);
Modified:
trunk/plugins/qtDominionPlugin/modules/search/templates/_searchResults.php
==============================================================================
--- trunk/plugins/qtDominionPlugin/modules/search/templates/_searchResults.php
Thu Mar 29 11:38:46 2012 (r11308)
+++ trunk/plugins/qtDominionPlugin/modules/search/templates/_searchResults.php
Thu Mar 29 13:46:14 2012 (r11309)
@@ -102,7 +102,7 @@
<h2 class="desktoponly"><?php echo __('Creation Date'); ?></h2>
<div class="scrollable dates" id="dates">
- <input type="text" value="<?php echo
$pager->facets['dates_start']['min'] ?>" name="from" /> - <input type="text"
value="<?php echo $pager->facets['dates_start']['max'] ?>" name="to" />
+ <input type="text" value="<?php echo
$pager->facets['dates_startDate']['min'] ?>" name="from" /> - <input
type="text" value="<?php echo $pager->facets['dates_startDate']['max'] ?>"
name="to" />
</div>
</div><!-- /.section -->
@@ -245,7 +245,7 @@
</p>
<p>
- <?php echo Qubit::renderDateStartEnd(null,
$doc['dates'][0]['start'], $doc['dates'][0]['end']); ?>
+ <?php echo Qubit::renderDateStartEnd(null,
$doc['dates'][0]['startDate'], $doc['dates'][0]['endDate']); ?>
<?php if
(!empty($doc[$doc['sourceCulture']]['creator'][0]['name'])): ?>
<?php echo __('by %1%', // FIXME: ARRAY ENUMERATION
array('%1%' =>
$doc[$sf_user->getCulture()]['creator'][0]['name'] ?:
$doc[$doc['sourceCulture']]['creator'][0]['name'])); ?>
Modified:
trunk/plugins/qtElasticSearchPlugin/lib/model/QubitInformationObjectMapping.class.php
==============================================================================
---
trunk/plugins/qtElasticSearchPlugin/lib/model/QubitInformationObjectMapping.class.php
Thu Mar 29 11:38:46 2012 (r11308)
+++
trunk/plugins/qtElasticSearchPlugin/lib/model/QubitInformationObjectMapping.class.php
Thu Mar 29 13:46:14 2012 (r11309)
@@ -105,8 +105,8 @@
// NB: this doesn't work correctly on 3-date ranges, eg. 1999-2000,
2001
preg_match("/^.*(?P<start>\d{4}).*(?P<end>\d{4}?).*/", $rendered,
$matches);
- $dateIndex['start'] = $matches['start'];
- $dateIndex['end'] = $matches['end'];
+ $dateIndex['startDate'] = $matches['start'];
+ $dateIndex['endDate'] = $matches['end'];
$dateIndex['typeId'] = $date->getType()->id;
if (isset($date->actor))
Copied and modified:
trunk/plugins/qtElasticSearchPlugin/lib/model/QubitPdoInformationObject.class.php
(from r11306, trunk/lib/QubitSearchInformationObject.class.php)
==============================================================================
--- trunk/lib/QubitSearchInformationObject.class.php Thu Mar 29 00:03:24
2012 (r11306, copy source)
+++
trunk/plugins/qtElasticSearchPlugin/lib/model/QubitPdoInformationObject.class.php
Thu Mar 29 13:46:14 2012 (r11309)
@@ -18,14 +18,13 @@
*/
/**
- * Manage information objects in search index
+ * Lightweight version of QubitInformationObject which uses PDO directly
instead of the Propel ORM
*
- * @package Qubit
- * @subpackage QubitSearch
- * @author David Juhasz <[email protected]>
+ * @package qtElasticSearchPlugin
+ * @author MJ Suhonos <[email protected]>
* @version SVN: $Id$
*/
-class QubitSearchInformationObject
+class QubitPdoInformationObject
{
public
$ancestors,
@@ -61,7 +60,7 @@
'extent_and_medium',
'class_name',
'collection_root_slug',
- 'culture',
+// 'culture',
'finding_aids',
'has_digital_object',
'identifier',
@@ -99,7 +98,7 @@
/**
* METHODS
*/
- public function __construct($id, $culture, $options = array())
+ public function __construct($id, $options = array())
{
if (isset($options['conn']))
{
@@ -111,7 +110,7 @@
self::$conn = Propel::getConnection();
}
- $this->loadData($id, $culture, $options);
+ $this->loadData($id, $options);
// Get inherited ancestors
if (isset($options['ancestors']))
@@ -122,11 +121,8 @@
// Get inherited repository, unless a repository is set at current level
if (isset($options['repository']) && !$this->__isset('repository_id'))
{
- $resource->repository = $options['repository'];
+ $this->repository = $options['repository'];
}
-
- $this->index = QubitSearch::getInstance()->getEngine()->getIndex();
- $this->doc = new Zend_Search_Lucene_Document;
}
public function __isset($name)
@@ -152,7 +148,7 @@
$this->data[$name] = $value;
}
- protected function loadData($id, $culture, $options = array())
+ protected function loadData($id)
{
if (!isset(self::$statements['informationObject']))
{
@@ -172,8 +168,8 @@
ON io.id = pubstat.object_id
LEFT JOIN '.QubitDigitalObject::TABLE_NAME.' do
ON io.id = do.information_object_id
- WHERE io.id = :id
- AND i18n.culture = :culture';
+ WHERE io.id = :id';
+// AND i18n.culture = :culture';
self::$statements['informationObject'] = self::$conn->prepare($sql);
}
@@ -181,7 +177,7 @@
// Do select
self::$statements['informationObject']->execute(array(
':id' => $id,
- ':culture' => $culture));
+));// ':culture' => $culture));
// Get first result
$this->data =
self::$statements['informationObject']->fetch(PDO::FETCH_ASSOC);
@@ -196,273 +192,6 @@
return $this;
}
- public function addToIndex()
- {
- // Pre-populate
- $this->getAncestors();
- $this->getRepository();
- $this->getLanguagesAndScripts();
-
- // Add fields
- foreach (self::$fields as $name)
- {
- $this->addField($name);
- }
-
- $this->addDocument();
- }
-
- public function addDocument()
- {
- $this->index->addDocument($this->doc);
- }
-
- protected function addField($name)
- {
- $camelName = lcfirst(sfInflector::camelize($name));
- $field = $value = null;
-
- switch ($name)
- {
- case 'class_name':
- $field = Zend_Search_Lucene_Field::Keyword($camelName,
'QubitInformationObject');
-
- break;
-
- case 'collection_root_slug':
- $field = Zend_Search_Lucene_Field::Keyword($camelName,
$this->getCollectionRoot()->slug);
-
- break;
-
- case 'creator':
- $names = array();
- foreach ($this->getActors(array('typeId' => QubitTerm::CREATION_ID))
as $item)
- {
- if (isset($item->authorized_form_of_name))
- {
- $names[] = $item->authorized_form_of_name;
- }
- }
-
- // Add field
- $field = Zend_Search_Lucene_Field::Unstored($camelName, implode(' ',
$names));
- $field->boost = 8; // Boost the relevance
-
- break;
-
- case 'creator_history':
- $histories = array();
- foreach ($this->getActors(array('typeId' => QubitTerm::CREATION_ID))
as $item)
- {
- if (isset($item->history))
- {
- $names[] = $item->history;
- }
- }
- $field = Zend_Search_Lucene_Field::Unstored($camelName, implode(' ',
$histories));
-
- break;
-
- // Serialized creator data for creating links in search results
- case 'creator_serialized':
- $creators = array();
- foreach ($this->getActors(array('typeId' => QubitTerm::CREATION_ID))
as $item)
- {
- $creators[] = array(
- 'name' => $item->authorized_form_of_name,
- 'slug' => $item->slug
- );
- }
-
- $field = Zend_Search_Lucene_Field::UnIndexed($camelName,
serialize($creators));
-
- break;
-
- // Serialized date array for display in search results
- case 'date_serialized':
- $field = Zend_Search_Lucene_Field::UnIndexed($camelName,
serialize($this->getDates('array')));
-
- break;
-
- case 'has_digital_object':
- $field = Zend_Search_Lucene_Field::Keyword($camelName,
$this->__isset('digital_object_id') ? 'true' : 'false');
-
- break;
-
- case 'identifier':
- $field = Zend_Search_Lucene_Field::Unstored($camelName,
$this->__get('identifier'));
- $field->boost = 5;
-
- break;
-
- case 'language':
- if (0 < count($this->languages))
- {
- $value = implode(' ', $this->languages);
- }
-
- $field = Zend_Search_Lucene_Field::Unstored($camelName, $value);
-
- break;
-
- case 'level_of_description':
- $field = Zend_Search_Lucene_Field::Text($camelName,
$this->getLevelOfDescription());
-
- break;
-
- case 'material_type_id':
- $field = Zend_Search_Lucene_Field::Unstored($camelName,
$this->getMaterialTypeId());
-
- break;
-
- case 'media_type':
- $field = Zend_Search_Lucene_Field::Unstored($camelName,
$this->getMediaTypeName());
-
- break;
-
- case 'thumbnail_path':
- $field = Zend_Search_Lucene_Field::UnIndexed($camelName,
$this->getThumbnailPath());
-
- break;
-
- case 'name':
- $field = Zend_Search_Lucene_Field::Unstored($camelName,
$this->getNameAccessPoints());
- $field->boost = 3;
-
- break;
-
- case 'notes':
- $field = Zend_Search_Lucene_Field::Unstored($camelName,
$this->getNotes());
-
- break;
-
- case 'parent':
- $field = Zend_Search_Lucene_Field::Unstored($camelName,
$this->ancestors[count($this->ancestors)-1]->slug);
-
- break;
-
- case 'part_of':
- $field = Zend_Search_Lucene_Field::Text($camelName,
$this->getCollectionRoot()->getTitle(array('culture' =>
$this->__get('culture'))));
-
- break;
-
- case 'physical_storage':
- $field = Zend_Search_Lucene_Field::Unstored($camelName,
$this->getStorageNames());
-
- break;
-
- case 'place':
- $field = Zend_Search_Lucene_Field::Unstored($camelName,
$this->getPlaceAccessPoints());
- $field->boost = 3;
-
- break;
-
- case 'reference_code':
- $field = Zend_Search_Lucene_Field::Text($camelName,
$this->getReferenceCode());
-
- break;
-
- case 'repository':
- if (isset($this->repository))
- {
- $value = $this->repository->getAuthorizedFormOfName(array('culture'
=> $this->__get('culture'), 'fallback' => true));
- }
-
- $field = Zend_Search_Lucene_Field::Text($camelName, $value);
-
- break;
-
- case 'repository_id':
- if (isset($this->repository))
- {
- $value = $this->repository->id;
- }
-
- $field = Zend_Search_Lucene_Field::Keyword($camelName, $value);
-
- break;
-
- case 'repository_slug':
- if (isset($this->repository))
- {
- $value = $this->repository->slug;
- }
-
- $field = Zend_Search_Lucene_Field::Keyword($camelName, $value);
-
- break;
-
- case 'subject':
- $field = Zend_Search_Lucene_Field::Unstored($camelName,
$this->getSubjectAccessPoints());
- $field->boost = 5;
-
- break;
-
- case 'script':
- if (0 < count($this->scripts))
- {
- $value = implode(' ', $this->scripts);
- }
-
- $field = Zend_Search_Lucene_Field::Unstored($camelName, $value);
-
- break;
-
- case 'title':
- $value = $this->__get('title');
- if (0 == strlen($value))
- {
- // Include an i18n fallback for proper search result display in case
the
- // title field was not translated
- $value = $this->getFallbackTitle();
- }
-
- $field = Zend_Search_Lucene_Field::Text($camelName, $value);
- $field->boost = 10;
-
- break;
-
- // DATES
- case 'start_date':
- case 'end_date':
- case 'date':
- $this->doc->addField(Zend_Search_Lucene_Field::Unstored($camelName,
implode(' ', $this->getDates($name))));
-
- break;
-
- // TEXT fields
- case 'scope_and_content':
- $field = Zend_Search_Lucene_Field::Text($camelName,
$this->__get($name));
-
- break;
-
- // KEYWORD fields (internal ids, slugs, etc.)
- case 'culture':
- case 'id':
- case 'media_type_id':
- case 'publication_status_id':
- case 'slug':
- if ($this->__isset($name))
- {
- $field = Zend_Search_Lucene_Field::Keyword($camelName,
$this->__get($name));
- }
-
- break;
-
- // UNSTORED fields
- default:
- if ($this->__isset($name))
- {
- $field = Zend_Search_Lucene_Field::Unstored($camelName,
$this->__get($name));
- }
- }
-
- if (isset($field))
- {
- $this->doc->addField($field);
- }
- }
-
/**
* Return an array of ancestors
*
@@ -544,6 +273,26 @@
return QubitPdo::fetchOne($sql, array($this->__get('id')));
}
+ public function hasChildren()
+ {
+ }
+
+ public function getCreators()
+ {
+ $creators = array();
+
+ foreach ($this->getActors(array('typeId' => QubitTerm::CREATION_ID)) as
$item)
+ {
+ $creators[] = array(
+ 'id' => $item->id,
+ 'culture' => $item->culture,
+ 'name' => $item->authorized_form_of_name,
+ 'history' => $item->history
+ );
+ }
+ return $creators;
+ }
+
public function getLevelOfDescription()
{
if (!isset(self::$lookups['levelOfDescription']))
@@ -559,7 +308,7 @@
if
(isset(self::$lookups['levelOfDescription'][$this->__get('level_of_description_id')]))
{
return
self::$lookups['levelOfDescription'][$this->__get('level_of_description_id')]->getName(array(
- 'culture' => $this->__get('culture'),
+ 'culture' => $this->__get('culture'),
'fallback' => true));
}
}
@@ -584,7 +333,7 @@
if (isset(self::$lookups['mediaType'][$this->__get('media_type_id')]))
{
return
self::$lookups['mediaType'][$this->__get('media_type_id')]->getName(array(
- 'culture' => $this->__get('culture'),
+ 'culture' => $this->__get('culture'),
'fallback' => true));
}
}
@@ -645,6 +394,7 @@
act_slug.slug,
act_i18n.authorized_form_of_name,
act_i18n.history,
+ act_i18n.culture,
i18n.date';
$sql .= ' FROM '.QubitEvent::TABLE_NAME.' event';
$sql .= ' JOIN '.QubitEventI18n::TABLE_NAME.' i18n
@@ -653,22 +403,22 @@
ON event.actor_id = act_i18n.id';
$sql .= ' LEFT JOIN '.QubitSlug::TABLE_NAME.' act_slug
ON event.actor_id = act_slug.object_id';
- $sql .= ' WHERE event.information_object_id = ?
- AND i18n.culture = ?
- AND (act_i18n.id IS NULL OR act_i18n.culture = ?)';
+ $sql .= ' WHERE event.information_object_id = ?';
+// AND i18n.culture = ?
+// AND (act_i18n.id IS NULL OR act_i18n.culture = ?)';
self::$statements['event'] = self::$conn->prepare($sql);
}
self::$statements['event']->execute(array(
- $this->__get('id'),
- $this->__get('culture'),
- $this->__get('culture')));
+ $this->__get('id')));
+// $this->__get('culture'),
+// $this->__get('culture')));
return self::$statements['event']->fetchAll(PDO::FETCH_OBJ);
}
- protected function getDates($field)
+ public function getDates($field)
{
$dates = array();
@@ -699,11 +449,25 @@
case 'array':
if (isset($item->date) || isset($item->start_date) ||
isset($item->end_date))
{
+ $rendered = Qubit::renderDateStartEnd($item->date,
$item->start_date, $item->end_date);
+
+ // try to extract two 4-digit years
+ // NB: this doesn't work correctly on 3-date ranges, eg.
1999-2000, 2001
+ preg_match("/^.*(?P<start>\d{4}).*(?P<end>\d{4}?).*/",
$rendered, $matches);
+
+ $item->start_date = $matches['start'];
+ $item->end_date = $matches['end'];
+/*
+ if (isset($date->actor))
+ {
+ $dateIndex['actor'] = $date->actor->__toString();
+ }
+*/
$dates[] = array(
- 'date' => $item->date,
- 'start_date' => $item->start_date,
- 'end_date' => $item->end_date,
- 'type_id' => $item->type_id);
+// 'date' => $item->date,
+ 'startDate' => $item->start_date,
+ 'endDate' => $item->end_date,
+ 'typeId' => $item->type_id);
}
break;
@@ -732,6 +496,8 @@
$actor = new stdClass();
+ $actor->id = $item->actor_id;
+ $actor->culture = $item->culture;
$actor->authorized_form_of_name = $item->authorized_form_of_name;
$actor->slug = $item->slug;
$actor->history = $item->history;
@@ -1043,4 +809,135 @@
return implode(' ', $names);
}
}
-}
+
+ // Serialize yaself! Don' disrespec yaself
+ public function serialize()
+ {
+ $serialized = array();
+
+ $serialized['slug'] = $this->slug;
+ $serialized['referenceCode'] = $this->getReferenceCode();
+ $serialized['identifier'] = $this->identifier;
+
+ $serialized['levelOfDescriptionId'] = $this->level_of_description_id;
+ $serialized['publicationStatusId'] = $this->publication_status_id;
+
+ // hierarchy information as arrays
+ $this->getAncestors();
+ $serialized['parentId'] =
$this->ancestors[count($this->ancestors)-1]->id;
+
+ // NB: this will include the ROOT_ID
+ foreach ($this->ancestors as $ancestor)
+ {
+ $serialized['ancestors'][] = $ancestor->id;
+ }
+
+ if ($this->hasChildren())
+ {
+ // NB: this should be an ordered array
+ foreach ($this->getChildren() as $child)
+ {
+ $serialized['children'][] = $child->id;
+ }
+ }
+
+ // embed digital object information
+ if ($this->media_type_id)
+ {
+ $serialized['digitalObject']['mediaTypeId'] = $this->media_type_id;
+ $serialized['digitalObject']['thumbnail_FullPath'] =
$this->getThumbnailPath();
+ }
+
+ $serialized['dates'] = $this->getDates('array');
+
+ // Repository (actor)
+ if ($repository = $this->getRepository(array('inherit' =>
empty($this->repositoryId))))
+ {
+ $repoI18ns = $repository->actorI18ns->indexBy('culture');
+ $serializedI18ns = QubitMapping::serializeI18ns(new QubitActor(),
$repoI18ns);
+
+ $serialized['repository'] = array('id' => $repository->id, 'i18n' =>
$serializedI18ns);
+ }
+
+ // Subject access points (terms)
+ foreach ($this->getSubjectAccessPoints() as $subject)
+ {
+ $term = $subject->getTerm();
+
+ $subjectI18ns = $term->termI18ns->indexBy('culture');
+ $serializedI18ns = QubitMapping::serializeI18ns(new QubitTerm(),
$subjectI18ns);
+
+ $serialized['subjects'][] = array('id' => $subject->id, 'i18n' =>
$serializedI18ns);
+ }
+
+ // Place access points (terms)
+ foreach ($this->getPlaceAccessPoints() as $place)
+ {
+ $term = $place->getTerm();
+
+ $placeI18ns = $term->termI18ns->indexBy('culture');
+ $serializedI18ns = QubitMapping::serializeI18ns(new QubitTerm(),
$placeI18ns);
+
+ $serialized['places'][] = array('id' => $place->id, 'i18n' =>
$serializedI18ns);
+
+ }
+
+ // Name access points (actors)
+ foreach ($this->getNameAccessPoints() as $name)
+ {
+ $nameI18ns = $name->object->actorI18ns->indexBy('culture');
+ $serializedI18ns = QubitMapping::serializeI18ns(new QubitActor(),
$nameI18ns);
+
+ $serialized['names'][] = array('id' => $name->object->id, 'i18n' =>
$serializedI18ns);
+ }
+
+ // Creators (actors)
+ foreach ($this->getCreators() as $creator)
+ {
+/*
+ $creatorI18ns = $creator->actorI18ns->indexBy('culture');
+ $serializedI18ns = QubitMapping::serializeI18ns(new QubitActor(),
$creatorI18ns);
+
+ $serialized['creators'][] = array('id' => $creator->id, 'i18n' =>
$serializedI18ns);
+*/
+ // FIXME: obviously this doesn't handle I18n properly
+ $serialized['creators'][] = array('id' => $creator['id'], 'i18n' =>
array(
+ array('authorizedFormOfName' =>
$creator['name'],
+ 'history' => $creator['history'],
+ 'culture' => $creator['culture'])
+ ));
+ }
+
+ // Notes
+ foreach ($this->getNotes() as $note)
+ {
+ $noteI18ns = $note->noteI18ns->indexBy('culture');
+ $serializedI18ns = QubitMapping::serializeI18ns(new QubitNote(),
$noteI18ns);
+
+ $serialized['notes'][] = array('id' => $note->id, 'i18n' =>
$serializedI18ns);
+ }
+
+ $serialized['sourceCulture'] = $this->source_culture;
+
+ foreach(QubitMapping::getI18nFields('QubitInformationObject') as
$camelName)
+ {
+ $fieldName = sfInflector::underscore($camelName);
+
+ if (!empty($this->data[$fieldName]))
+ {
+ $I18ns['culture'] = 'en'; // FIXME: OBVIOUSLY THIS IS A BAD HACK
+ $I18ns[lcfirst($camelName)] = $this->data[$fieldName];
+ }
+ }
+
+ $serialized['i18n'] = array($I18ns);
+//var_dump($serialized);
+//var_dump($this->data); exit;
+
+// $thisI18ns = $this->informationObjectI18ns->indexBy('culture');
+// $serialized['i18n'] = QubitMapping::serializeI18ns($this, $thisI18ns);
+
+ return $serialized;
+ }
+
+}
\ No newline at end of file
Modified:
trunk/plugins/qtElasticSearchPlugin/lib/qtElasticSearchPlugin.class.php
==============================================================================
--- trunk/plugins/qtElasticSearchPlugin/lib/qtElasticSearchPlugin.class.php
Thu Mar 29 11:38:46 2012 (r11308)
+++ trunk/plugins/qtElasticSearchPlugin/lib/qtElasticSearchPlugin.class.php
Thu Mar 29 13:46:14 2012 (r11309)
@@ -39,10 +39,12 @@
public $index = null;
- /*
- * Enable singleton creation via getInstance()
- */
- protected static $_instance;
+ // Enable singleton creation via getInstance()
+ protected static
+ $_instance,
+ $conn,
+ $statements,
+ $counter = 0;
public static function getInstance()
{
@@ -193,11 +195,8 @@
{
sfContext::createInstance(sfProjectConfiguration::getApplicationConfiguration('qubit',
'cli', true));
- $start = microtime(true);
- $this->logger->log('Populating index...', 'qtElasticSearch');
-
- // if we are using an offset to resume from a segfault, optimize the index
instead of deleting
- if (!isset($options['actorOffset']) && !isset($options['ioOffset']) &&
!isset($options['termOffset']))
+ // if we are skipping existing objects, optimize the index instead of
deleting
+ if (!isset($options['skip']))
{
$this->index->delete();
$this->initialize();
@@ -211,120 +210,65 @@
// set buffering and updates to be batched for better performance
$this->enableBatch();
- $termOffset = intval($options['termOffset']);
- $actorOffset = intval($options['actorOffset']);
- $ioOffset = intval($options['ioOffset']);
- $repoOffset = intval($options['repoOffset']);
+ $this->timer = new QubitTimer;
+ $this->logger->log('Populating index...', 'qtElasticSearch');
$total = 0;
- // index terms
- if (-1 < $termOffset)
- {
- $criteria = new Criteria;
- $criteria->add(QubitTerm::ID, QubitTerm::ROOT_ID, Criteria::NOT_EQUAL);
- $criteria->add(QubitTerm::TAXONOMY_ID, array(QubitTaxonomy::SUBJECT_ID,
QubitTaxonomy::PLACE_ID), Criteria::IN);
+ // terms
+ $criteria = new Criteria;
+ $criteria->add(QubitTerm::ID, QubitTerm::ROOT_ID, Criteria::NOT_EQUAL);
+ $criteria->add(QubitTerm::TAXONOMY_ID, array(QubitTaxonomy::SUBJECT_ID,
QubitTaxonomy::PLACE_ID), Criteria::IN);
- if (0 < $termOffset)
- {
- $criteria->setOffset($termOffset);
- $this->logger->log('Ignoring first '.$termOffset.' terms.',
'qtElasticSearch');
- }
+ $terms = QubitTerm::get($criteria);
+ $total = $total + count($terms);
- $terms = QubitTerm::get($criteria);
- $rowcount = count($terms) + $termOffset;
- $total = $total + $rowcount;
+ foreach ($terms as $key => $term)
+ {
+ $this->save($term);
- foreach ($terms as $key => $term)
+ if ($options['verbose'])
{
- $this->save($term);
- $this->logger->log('"'.$term->__toString().'" inserted
('.round(microtime(true) - $start, 2).'s) ('.($key + $termOffset +
1).'/'.$rowcount.')', 'qtElasticSearch');
+ $this->logger->log('"'.$term->__toString().'" inserted
('.$this->timer->elapsed().'s) ('.($key+1).'/'.count($terms).')',
'qtElasticSearch::QubitTerm');
}
}
- else
- {
- $this->logger->log('Terms are ignored.', 'qtElasticSearch');
- }
- // index actors
- if (-1 < $actorOffset)
- {
- $criteria = new Criteria;
- $criteria->add(QubitActor::ID, QubitActor::ROOT_ID, Criteria::NOT_EQUAL);
- $criteria = QubitActor::addGetOnlyActorsCriteria($criteria);
+ // repositories
+ $criteria = new Criteria;
+ $criteria->add(QubitRepository::ID, QubitRepository::ROOT_ID,
Criteria::NOT_EQUAL);
- if (0 < $actorOffset)
- {
- $criteria->setOffset($actorOffset);
- $this->logger->log('Ignoring first '.$actorOffset.' actors.',
'qtElasticSearch');
- }
+ $repositories = QubitRepository::get($criteria);
+ $total = $total + count($repositories);
- $actors = QubitActor::get($criteria);
- $rowcount = count($actors) + $actorOffset;
- $total = $total + $rowcount;
+ foreach ($repositories as $key => $repository)
+ {
+ $this->save($repository);
- foreach ($actors as $key => $actor)
+ if ($options['verbose'])
{
- $this->save($actor);
- $this->logger->log('"'.$actor->__toString().'" inserted
('.round(microtime(true) - $start, 2).'s) ('.($key + $actorOffset +
1).'/'.$rowcount.')', 'qtElasticSearch');
+ $this->logger->log('"'.$repository->__toString().'" inserted
('.$this->timer->elapsed().'s) ('.($key+1).'/'.count($repositories).')',
'qtElasticSearch::QubitRepository');
}
}
- else
- {
- $this->logger->log('Actors are ignored.', 'qtElasticSearch');
- }
-
- // index repositories
- if (-1 < $repoOffset)
- {
- $criteria = new Criteria;
- $criteria->add(QubitRepository::ID, QubitRepository::ROOT_ID,
Criteria::NOT_EQUAL);
- if (0 < $repoOffset)
- {
- $criteria->setOffset($repoOffset);
- $this->logger->log('Ignoring first '.$repoOffset.' repositories.',
'qtElasticSearch');
- }
+ // information objects
+ $total = $total + $this->populateInformationObjects($options);
- $repositories = QubitRepository::get($criteria);
- $rowcount = count($repositories) + $repoOffset;
- $total = $total + $rowcount;
+ // index actors
+ // FIXME: replicate IO PDO mechanism to index actors
+ $criteria = new Criteria;
+ $criteria->add(QubitActor::ID, QubitActor::ROOT_ID, Criteria::NOT_EQUAL);
+ $criteria = QubitActor::addGetOnlyActorsCriteria($criteria);
- foreach ($repositories as $key => $repository)
- {
- $this->save($repository);
- $this->logger->log('"'.$repository->__toString().'" inserted
('.round(microtime(true) - $start, 2).'s) ('.($key + $repoOffset +
1).'/'.$rowcount.')', 'qtElasticSearch');
- }
- }
- else
- {
- $this->logger->log('Repositories are ignored.', 'qtElasticSearch');
- }
+ $actors = QubitActor::get($criteria);
+ $total = $total + count($actors);
- // index information objects
- if (-1 < $ioOffset)
+ foreach ($actors as $key => $actor)
{
- $criteria = new Criteria;
- $criteria->add(QubitInformationObject::ID,
QubitInformationObject::ROOT_ID, Criteria::NOT_EQUAL);
+ $this->save($actor);
- if (0 < $ioOffset)
+ if ($options['verbose'])
{
- $criteria->setOffset($ioOffset);
- $this->logger->log('Ignoring first '.$ioOffset.' information
objects.', 'qtElasticSearch');
+ $this->logger->log('"'.$actor->__toString().'" inserted
('.$this->timer->elapsed().'s) ('.($key+1).'/'.count($actors).')',
'qtElasticSearch::QubitActor');
}
-
- $informationObjects = QubitInformationObject::get($criteria);
- $rowcount = count($informationObjects) + $ioOffset;
- $total = $total + $rowcount;
-
- foreach ($informationObjects as $key => $informationObject)
- {
- $this->save($informationObject);
- $this->logger->log('"'.$informationObject->__toString().'" inserted
('.round(microtime(true) - $start, 2).'s) ('.($key + $ioOffset +
1).'/'.$rowcount.')', 'qtElasticSearch');
- }
- }
- else
- {
- $this->logger->log('Information objects are ignored.',
'qtElasticSearch');
}
// if there are still documents in the batch queue, send them
@@ -335,7 +279,7 @@
$this->batchDocs = array();
}
- $this->logger->log('Index populated with "'.($total).'" documents in
"'.round(microtime(true) - $start, 2).'" seconds.', 'qtElasticSearch');
+ $this->logger->log('Index populated with "'.($total).'" documents in
"'.$this->timer->elapsed().'" seconds.', 'qtElasticSearch');
}
/*
@@ -356,4 +300,133 @@
}
}
+ /*
+ * PORTED FROM QUBITSEARCH CLASS
+ */
+
+ public function populateInformationObjects($options = array())
+ {
+ if (!isset(self::$conn))
+ {
+ self::$conn = Propel::getConnection();
+ }
+
+ // Get count of all information objects
+ $sql = 'SELECT COUNT(*)';
+ $sql .= ' FROM '.QubitInformationObject::TABLE_NAME;
+ $sql .= ' WHERE id > ?';
+
+ $totalRows = QubitPdo::fetchColumn($sql,
array(QubitInformationObject::ROOT_ID));
+
+ // Recursively descend down hierarchy
+ $this->recursivelyAddInformationObjects(QubitInformationObject::ROOT_ID,
$totalRows, $options);
+
+ return $totalRows;
+ }
+
+ public function recursivelyAddInformationObjects($parentId, $totalRows,
$options = array())
+ {
+ // Get information objects
+ if (!isset(self::$statements['getChildren']))
+ {
+ $sql = 'SELECT
+ io.id,
+ io.lft,
+ io.rgt,
+ i18n.culture,
+ i18n.title';
+ $sql .= ' FROM '.QubitInformationObject::TABLE_NAME.' io';
+ $sql .= ' JOIN '.QubitInformationObjectI18n::TABLE_NAME.' i18n
+ ON io.id = i18n.id';
+ $sql .= ' WHERE io.parent_id = ?';
+ $sql .= ' ORDER BY io.lft';
+
+ self::$statements['getChildren'] = self::$conn->prepare($sql);
+ }
+
+ self::$statements['getChildren']->execute(array($parentId));
+
+ // Loop through results, and add to search index
+ foreach (self::$statements['getChildren']->fetchAll(PDO::FETCH_OBJ) as
$item)
+ {
+ // 9266 IOs in 12.3s (750 /s)
+ $object = new QubitPdoInformationObject($item->id);
+
+ // empty: 9266 IOs in 78.0s (119 /s) --> 58.8 w/o logging (158/s)
+ // full: 9266 IOs in 209s (44/s)
+ $serialized = $object->serialize();
+/*
+ if
($this->array_compare($this->serialize(QubitInformationObject::getById($item->id)),
$serialized))
+ {
+ // WARNING: PDO object is not serialized correctly
+ }
+*/
+ // 9266 IOs in 221s (42/s)
+ $document = new Elastica_Document($object->id, $serialized);
+ $document->setType('QubitInformationObject');
+
+ // add this document to the batch queue
+ $this->batchDocs[] = $document;
+
+ // if we have a full batch, send in bulk
+ if (count($this->batchDocs) >= $this->batchSize)
+ {
+ $this->index->addDocuments($this->batchDocs);
+ $this->index->refresh();
+
+ $this->batchDocs = array();
+ }
+
+ // Log it
+ self::$counter++;
+
+ if ($options['verbose'])
+ {
+ $this->logger->log('"'.$item->title.'" inserted
('.$this->timer->elapsed().'s) ('.self::$counter.'/'.$totalRows.')',
'qtElasticSearch::QubitInformationObject');
+ }
+
+ // Descend hierarchy
+ if (1 < ($item->rgt - $item->lft))
+ {
+ // Pass ancestors and repository down to descendants
+ $this->recursivelyAddInformationObjects($item->id, $totalRows, array(
+ 'ancestors' => array_merge($object->getAncestors(), array($object)),
+ 'repository' => $object->getRepository()));
+ }
+
+ }
+ }
+
+ public function array_compare($array1, $array2) {
+ $diff = false;
+ // Left-to-right
+ foreach ($array1 as $key => $value) {
+ if (!array_key_exists($key,$array2)) {
+ $diff[0][$key] = $value;
+ } elseif (is_array($value)) {
+ if (!is_array($array2[$key])) {
+ $diff[0][$key] = $value;
+ $diff[1][$key] = $array2[$key];
+ } else {
+ $new = $this->array_compare($value, $array2[$key]);
+ if ($new !== false) {
+ if (isset($new[0])) $diff[0][$key] = $new[0];
+ if (isset($new[1])) $diff[1][$key] = $new[1];
+ };
+ };
+ } elseif ($array2[$key] !== $value) {
+ $diff[0][$key] = $value;
+ $diff[1][$key] = $array2[$key];
+ };
+ };
+ // Right-to-left
+ foreach ($array2 as $key => $value) {
+ if (!array_key_exists($key,$array1)) {
+ $diff[1][$key] = $value;
+ };
+ // No direct comparsion because matching keys were compared in the
+ // left-to-right loop earlier, recursively.
+ };
+ return $diff;
+ }
}
\ No newline at end of file
Modified: trunk/plugins/sfSearchPlugin/lib/task/xfPopulateTask.class.php
==============================================================================
--- trunk/plugins/sfSearchPlugin/lib/task/xfPopulateTask.class.php Thu Mar
29 11:38:46 2012 (r11308)
+++ trunk/plugins/sfSearchPlugin/lib/task/xfPopulateTask.class.php Thu Mar
29 13:46:14 2012 (r11309)
@@ -28,6 +28,7 @@
new sfCommandOption('application', null,
sfCommandOption::PARAMETER_OPTIONAL, 'The application name', 'qubit'),
new sfCommandOption('env', null, sfCommandOption::PARAMETER_REQUIRED,
'The environment', 'cli'),
// new sfCommandOption('optimize', 'o', sfCommandOption::PARAMETER_NONE,
'If passed, the index is optimized after population'),
+ new sfCommandOption('verbose', 'v', sfCommandOption::PARAMETER_NONE, 'If
passed, progress is displayed for each object indexed'),
new sfCommandOption('skip', 'k', sfCommandOption::PARAMETER_REQUIRED,
'Only skip "actors" or "io" (information objects)')));
$this->namespace = 'search';
--
You received this message because you are subscribed to the Google Groups
"Qubit Toolkit Commits" group.
To post to this group, send email to [email protected].
To unsubscribe from this group, send email to
[email protected].
For more options, visit this group at
http://groups.google.com/group/qubit-commits?hl=en.