Author: mj
Date: Tue Apr 3 11:46:30 2012
New Revision: 11352
Log:
Issue 2199: fix place/name term IDs on indexing. Issue 2287: complete I18n
mapping for IO PDO class. Issue 2288: more correct autocomplete behaviour, fix
name/place faceting.
Modified:
trunk/plugins/qtDominionPlugin/modules/search/actions/autocompleteAction.class.php
trunk/plugins/qtDominionPlugin/modules/search/actions/indexAction.class.php
trunk/plugins/qtElasticSearchPlugin/lib/model/QubitInformationObjectMapping.class.php
trunk/plugins/qtElasticSearchPlugin/lib/model/QubitPdoInformationObject.class.php
trunk/plugins/qtElasticSearchPlugin/lib/qtElasticSearchPlugin.class.php
Modified:
trunk/plugins/qtDominionPlugin/modules/search/actions/autocompleteAction.class.php
==============================================================================
---
trunk/plugins/qtDominionPlugin/modules/search/actions/autocompleteAction.class.php
Tue Apr 3 11:44:20 2012 (r11351)
+++
trunk/plugins/qtDominionPlugin/modules/search/actions/autocompleteAction.class.php
Tue Apr 3 11:46:30 2012 (r11352)
@@ -32,31 +32,47 @@
$query = new Elastica_Query();
$query->setLimit(3);
- // TODO: have to sort this on "recent" date
- $query->setFields(array('slug', 'i18n'));
- $query->setQuery(new Elastica_Query_Wildcard('i18n.title', $querystring .
'*'));
- $this->descriptions =
QubitSearch::getInstance()->index->getType('QubitInformationObject')->search($query);
- $this->descriptionsHits = $this->descriptions->getTotalHits();
+ // TODO: have to sort this on "recent" date (IOs only)
+ $queryString = new Elastica_Query_QueryString($querystring . '*');
+ $queryString->setDefaultOperator('AND');
+ $queryString->setAutoGeneratePhraseQueries(true);
+
+ // repositories
+ $queryString->setFields(array('actor.authorizedFormOfName'));
+ $query->setFields(array('slug', 'actor'));
+ $query->setQuery($queryString);
+ $this->repositories =
QubitSearch::getInstance()->index->getType('QubitRepository')->search($query);
+ $this->repositoriesHits = $this->repositories->getTotalHits();
+
+ // actors
+ $queryString->setFields(array('i18n.authorizedFormOfName'));
$query->setFields(array('slug', 'i18n'));
- $query->setQuery(new Elastica_Query_Wildcard('i18n.authorizedFormOfName',
$querystring . '*'));
+ $query->setQuery($queryString);
+
$this->actors =
QubitSearch::getInstance()->index->getType('QubitActor')->search($query);
$this->actorsHits = $this->actors->getTotalHits();
- $query->setFields(array('slug', 'actor'));
- $query->setQuery(new Elastica_Query_Wildcard('actor.authorizedFormOfName',
$querystring . '*'));
- $this->repositories =
QubitSearch::getInstance()->index->getType('QubitRepository')->search($query);
- $this->repositoriesHits = $this->repositories->getTotalHits();
+ // information objects
+ $queryString->setFields(array('i18n.title'));
+ $query->setFields(array('slug', 'i18n'));
+ $query->setQuery($queryString);
+
+ $this->descriptions =
QubitSearch::getInstance()->index->getType('QubitInformationObject')->search($query);
+ $this->descriptionsHits = $this->descriptions->getTotalHits();
+
+ // terms
+ $queryString->setFields(array('i18n.name'));
+ $query->setFields(array('slug', 'i18n', 'taxonomyId'));
+ $query->setQuery($queryString);
$filter = new Elastica_Filter_Term();
- $query->setFields(array('slug', 'i18n'));
- $query->setQuery(new Elastica_Query_Wildcard('i18n.name', $querystring .
'*'));
$this->subjects =
QubitSearch::getInstance()->index->getType('QubitTerm')->search($query->setFilter($filter->setTerm('taxonomyId',
QubitTaxonomy::SUBJECT_ID)));
$this->subjectsHits = $this->subjects->getTotalHits();
if (0 == $this->descriptionsHits && 0 == $this->actorsHits && 0 ==
$this->repositoriesHits && 0 == $this->subjectsHits)
{
- return sfView::NONE;
+// return sfView::NONE;
}
}
}
Modified:
trunk/plugins/qtDominionPlugin/modules/search/actions/indexAction.class.php
==============================================================================
--- trunk/plugins/qtDominionPlugin/modules/search/actions/indexAction.class.php
Tue Apr 3 11:44:20 2012 (r11351)
+++ trunk/plugins/qtDominionPlugin/modules/search/actions/indexAction.class.php
Tue Apr 3 11:46:30 2012 (r11352)
@@ -159,13 +159,13 @@
case 'subjects.id':
$criteria = new Criteria;
- $criteria->add(QubitObjectTermRelation::ID, array_keys($ids),
Criteria::IN);
+ $criteria->add(QubitTerm::ID, array_keys($ids), Criteria::IN);
- $subjectAPs = QubitObjectTermRelation::get($criteria);
+ $subjectAPs = QubitTerm::get($criteria);
foreach ($subjectAPs as $subjectAP)
{
- $subjectAPnames[$subjectAP->id] =
$subjectAP->term->getName(array('cultureFallback' => true, 'culture' =>
$this->context->user->getCulture()));
+ $subjectAPnames[$subjectAP->id] =
$subjectAP->getName(array('cultureFallback' => true, 'culture' =>
$this->context->user->getCulture()));
}
foreach ($facet['terms'] as &$term)
@@ -200,13 +200,13 @@
case 'places.id':
$criteria = new Criteria;
- $criteria->add(QubitObjectTermRelation::ID, array_keys($ids),
Criteria::IN);
+ $criteria->add(QubitTerm::ID, array_keys($ids), Criteria::IN);
- $placeAPs = QubitObjectTermRelation::get($criteria);
+ $placeAPs = QubitTerm::get($criteria);
foreach ($placeAPs as $placeAP)
{
- $placeAPnames[$placeAP->id] =
$placeAP->term->getName(array('cultureFallback' => true, 'culture' =>
$this->context->user->getCulture()));
+ $placeAPnames[$placeAP->id] =
$placeAP->getName(array('cultureFallback' => true, 'culture' =>
$this->context->user->getCulture()));
}
foreach ($facet['terms'] as &$term)
Modified:
trunk/plugins/qtElasticSearchPlugin/lib/model/QubitInformationObjectMapping.class.php
==============================================================================
---
trunk/plugins/qtElasticSearchPlugin/lib/model/QubitInformationObjectMapping.class.php
Tue Apr 3 11:44:20 2012 (r11351)
+++
trunk/plugins/qtElasticSearchPlugin/lib/model/QubitInformationObjectMapping.class.php
Tue Apr 3 11:46:30 2012 (r11352)
@@ -131,10 +131,8 @@
{
$term = $subject->getTerm();
- $subjectI18ns = $term->termI18ns->indexBy('culture');
- $serializedI18ns = self::serializeI18ns(new QubitTerm(),
$subjectI18ns);
-
- $serialized['subjects'][] = array('id' => $subject->id, 'i18n' =>
$serializedI18ns);
+ $serializedI18ns = QubitTermMapping::serializeI18ns($term,
$term->termI18ns->indexBy('culture'));
+ $serialized['subjects'][] = array('id' => $term->id, 'i18n' =>
$serializedI18ns);
}
// Place access points (terms)
@@ -142,37 +140,28 @@
{
$term = $place->getTerm();
- $placeI18ns = $term->termI18ns->indexBy('culture');
- $serializedI18ns = self::serializeI18ns(new QubitTerm(), $placeI18ns);
-
- $serialized['places'][] = array('id' => $place->id, 'i18n' =>
$serializedI18ns);
-
+ $serializedI18ns = QubitTermMapping::serializeI18ns($term,
$term->termI18ns->indexBy('culture'));
+ $serialized['places'][] = array('id' => $term->id, 'i18n' =>
$serializedI18ns);
}
// Name access points (actors)
foreach ($object->getNameAccessPoints() as $name)
{
- $nameI18ns = $name->object->actorI18ns->indexBy('culture');
- $serializedI18ns = self::serializeI18ns(new QubitActor(), $nameI18ns);
-
+ $serializedI18ns = self::serializeI18ns(new QubitActor(),
$name->object->actorI18ns->indexBy('culture'));
$serialized['names'][] = array('id' => $name->object->id, 'i18n' =>
$serializedI18ns);
}
// Creators (actors)
foreach ($object->getCreators() as $creator)
{
- $creatorI18ns = $creator->actorI18ns->indexBy('culture');
- $serializedI18ns = self::serializeI18ns(new QubitActor(),
$creatorI18ns);
-
+ $serializedI18ns = self::serializeI18ns(new QubitActor(),
$creator->actorI18ns->indexBy('culture'));
$serialized['creators'][] = array('id' => $creator->id, 'i18n' =>
$serializedI18ns);
}
// Notes
foreach ($object->getNotes() as $note)
{
- $noteI18ns = $note->noteI18ns->indexBy('culture');
- $serializedI18ns = self::serializeI18ns(new QubitNote(), $noteI18ns);
-
+ $serializedI18ns = self::serializeI18ns(new QubitNote(),
$note->noteI18ns->indexBy('culture'));
$serialized['notes'][] = array('id' => $note->id, 'i18n' =>
$serializedI18ns);
}
Modified:
trunk/plugins/qtElasticSearchPlugin/lib/model/QubitPdoInformationObject.class.php
==============================================================================
---
trunk/plugins/qtElasticSearchPlugin/lib/model/QubitPdoInformationObject.class.php
Tue Apr 3 11:44:20 2012 (r11351)
+++
trunk/plugins/qtElasticSearchPlugin/lib/model/QubitPdoInformationObject.class.php
Tue Apr 3 11:46:30 2012 (r11352)
@@ -188,7 +188,7 @@
$sql = 'SELECT
node.id';
$sql .= ' FROM '.QubitInformationObject::TABLE_NAME.' node';
- $sql .= ' WHERE node.parent_id = ?';
+ $sql .= ' WHERE node.parent_id = :id';
$sql .= ' ORDER BY lft';
$this->children = QubitPdo::fetchAll($sql, array(':id' => $this->id));
@@ -197,11 +197,6 @@
return $this->children;
}
- public function hasChildren()
- {
- return !empty($this->children);
- }
-
/**
* Return the closest repository
*
@@ -244,7 +239,7 @@
}
}
-// TODO: FIX THESE METHODS
+// TODO: FIX/REMOVE THESE METHODS
protected function getFallbackTitle()
{
$sql = 'SELECT i18n.title';
@@ -329,6 +324,7 @@
if ('1' == sfConfig::get('app_inherit_code_informationobject', 1))
{
$refcode = '';
+ $this->getRepository();
if (isset($this->repository))
{
if (null != $cc = $this->repository->getCountryCode(array('culture' =>
$this->__get('source_culture'))))
@@ -343,6 +339,7 @@
}
$identifiers = array();
+ $this->getAncestors();
foreach (array_merge($this->ancestors, array($this)) as $item)
{
if (isset($item->identifier))
@@ -754,23 +751,19 @@
$serialized['levelOfDescriptionId'] = $this->level_of_description_id;
$serialized['publicationStatusId'] = $this->publication_status_id;
- // hierarchy information as arrays
- $this->getAncestors();
- $serialized['parentId'] =
$this->ancestors[count($this->ancestors)-1]->id;
-
// NB: this will include the ROOT_ID
- foreach ($this->ancestors as $ancestor)
+ foreach ($this->getAncestors() as $ancestor)
{
$serialized['ancestors'][] = $ancestor->id;
}
- if ($this->hasChildren())
+ // hierarchy information as arrays
+ $serialized['parentId'] =
$this->ancestors[count($this->ancestors)-1]->id;
+
+ // NB: this should be an ordered array
+ foreach ($this->getChildren() as $child)
{
- // NB: this should be an ordered array
- foreach ($this->getChildren() as $child)
- {
- $serialized['children'][] = $child->id;
- }
+ $serialized['children'][] = $child->id;
}
// embed digital object information
@@ -780,10 +773,14 @@
$serialized['digitalObject']['thumbnail_FullPath'] =
$this->getThumbnailPath();
}
- $serialized['dates'] = $this->getDates('array');
+ $dates = $this->getDates('array');
+ if (0 < count($dates))
+ {
+ $serialized['dates'] = $dates;
+ }
// Repository (actor)
- if ($repository = $this->getRepository(array('inherit' =>
empty($this->repositoryId))))
+ if ($repository = $this->getRepository())
{
$repoI18ns = $repository->actorI18ns->indexBy('culture');
$serializedI18ns = QubitMapping::serializeI18ns(new QubitActor(),
$repoI18ns);
@@ -796,9 +793,7 @@
{
$term = QubitTerm::getById($subject->id);
- $subjectI18ns = $term->termI18ns->indexBy('culture');
- $serializedI18ns = QubitMapping::serializeI18ns(new QubitTerm(),
$subjectI18ns);
-// FIXME: these IDs are wrong
+ $serializedI18ns = QubitMapping::serializeI18ns(new QubitTerm(),
$term->termI18ns->indexBy('culture'));
$serialized['subjects'][] = array('id' => $subject->id, 'i18n' =>
$serializedI18ns);
}
@@ -807,15 +802,12 @@
{
$term = QubitTerm::getById($place->id);
- $placeI18ns = $term->termI18ns->indexBy('culture');
- $serializedI18ns = QubitMapping::serializeI18ns(new QubitTerm(),
$placeI18ns);
-// FIXME: these IDs are wrong
+ $serializedI18ns = QubitMapping::serializeI18ns(new QubitTerm(),
$term->termI18ns->indexBy('culture'));
$serialized['places'][] = array('id' => $place->id, 'i18n' =>
$serializedI18ns);
-
}
// Name access points (actors)
- // FIXME use QubitPdoActor class
+ // TODO use QubitPdoActor class?
foreach ($this->getNameAccessPoints() as $name)
{
$nameSerialized = $name->serialize();
@@ -827,36 +819,25 @@
}
// Creators (actors)
- // FIXME use QubitPdoActor class
+ // TODO use QubitPdoActor class?
foreach ($this->getCreators() as $creator)
{
+ $i18n = array();
+ if (!empty($creator['name'])) $i18n['authorizedFormOfName'] =
$creator['name'];
+ if (!empty($creator['history'])) $i18n['history'] =
$creator['history'];
+ if (!empty($creator['culture'])) $i18n['culture'] =
$creator['culture'];
-// $creatorI18ns = $creator->actorI18ns->indexBy('culture');
-// $serializedI18ns = QubitMapping::serializeI18ns(new QubitActor(),
$creatorI18ns);
-// $serialized['creators'][] = array('id' => $creator->id, 'i18n' =>
$serializedI18ns);
-
- // FIXME: obviously this doesn't handle I18n properly
- $serialized['creators'][] = array('id' => $creator['id'], 'i18n' =>
array(
- array('authorizedFormOfName' =>
$creator['name'],
- 'history' => $creator['history'],
- 'culture' => $creator['culture'])
- ));
+ $serialized['creators'][] = array('id' => $creator['id'], 'i18n' =>
array($i18n));
}
// Notes
- // FIXME: same as above
foreach ($this->getNotes() as $note)
{
-/*
- $noteI18ns = $note->noteI18ns->indexBy('culture');
- $serializedI18ns = QubitMapping::serializeI18ns(new QubitNote(),
$noteI18ns);
- $serialized['notes'][] = array('id' => $note->id, 'i18n' =>
$serializedI18ns);
-*/
- $serialized['notes'][] = array('id' => $note->id, 'i18n' => array(
- array('content' => $note->content),
- array('culture' => $note->culture)
- ));
+ $i18n = array();
+ if (!empty($note->content)) $i18n['content'] = $note->content;
+ if (!empty($note->culture)) $i18n['culture'] = $note->culture;
+ $serialized['notes'][] = array('id' => $note->id, 'i18n' =>
array($i18n));
}
// get all i18n-ized versions of this object
Modified:
trunk/plugins/qtElasticSearchPlugin/lib/qtElasticSearchPlugin.class.php
==============================================================================
--- trunk/plugins/qtElasticSearchPlugin/lib/qtElasticSearchPlugin.class.php
Tue Apr 3 11:44:20 2012 (r11351)
+++ trunk/plugins/qtElasticSearchPlugin/lib/qtElasticSearchPlugin.class.php
Tue Apr 3 11:46:30 2012 (r11352)
@@ -205,6 +205,7 @@
}
else
{
+ $skips = explode(',', $options['skip']);
$this->optimize();
}
@@ -215,46 +216,52 @@
$this->logger->log('Populating index...', 'qtElasticSearch');
$total = 0;
- // terms
- $criteria = new Criteria;
- $criteria->add(QubitTerm::ID, QubitTerm::ROOT_ID, Criteria::NOT_EQUAL);
- $criteria->add(QubitTerm::TAXONOMY_ID, array(QubitTaxonomy::SUBJECT_ID,
QubitTaxonomy::PLACE_ID), Criteria::IN);
+ // repositories
+ if (!in_array('repos', $skips))
+ {
+ self::$counter = 0;
+ $this->logger->log('Indexing Repositories...', 'qtElasticSearch');
- $terms = QubitTerm::get($criteria);
- $total = $total + count($terms);
+ $criteria = new Criteria;
+ $criteria->add(QubitRepository::ID, QubitRepository::ROOT_ID,
Criteria::NOT_EQUAL);
- foreach ($terms as $key => $term)
- {
- $this->save($term);
+ $repositories = QubitRepository::get($criteria);
+ $total = $total + count($repositories);
- if ($options['verbose'])
+ foreach ($repositories as $key => $repository)
{
- $this->logger->log('QubitTerm "'.$term->__toString().'" inserted
('.$this->timer->elapsed().'s) ('.($key+1).'/'.count($terms).')',
'qtElasticSearch');
+ $this->save($repository);
+
+ if ($options['verbose'])
+ {
+ $this->logger->log('QubitRepository "'.$repository->__toString().'"
inserted ('.$this->timer->elapsed().'s)
('.($key+1).'/'.count($repositories).')', 'qtElasticSearch');
+ }
}
}
- // repositories
- $criteria = new Criteria;
- $criteria->add(QubitRepository::ID, QubitRepository::ROOT_ID,
Criteria::NOT_EQUAL);
-
- $repositories = QubitRepository::get($criteria);
- $total = $total + count($repositories);
-
- foreach ($repositories as $key => $repository)
+ // terms
+ if (!in_array('terms', $skips))
{
- $this->save($repository);
-
- if ($options['verbose'])
- {
- $this->logger->log('QubitRepository "'.$repository->__toString().'"
inserted ('.$this->timer->elapsed().'s)
('.($key+1).'/'.count($repositories).')', 'qtElasticSearch');
- }
+ self::$counter = 0;
+ $this->logger->log('Indexing Terms...', 'qtElasticSearch');
+ $total = $total + $this->addTerms($options);
}
// information objects
- $total = $total + $this->populateInformationObjects($options);
+ if (!in_array('ios', $skips))
+ {
+ self::$counter = 0;
+ $this->logger->log('Indexing Information Objects...', 'qtElasticSearch');
+ $total = $total + $this->populateInformationObjects($options);
+ }
// actors
- $total = $total + $this->addActors($options);
+ if (!in_array('actors', $skips))
+ {
+ self::$counter = 0;
+ $this->logger->log('Indexing Actors...', 'qtElasticSearch');
+ $total = $total + $this->addActors($options);
+ }
// if there are still documents in the batch queue, send them
if ($this->batchMode && count($this->batchDocs) > 0)
@@ -330,11 +337,8 @@
// Loop through results, and add to search index
foreach (self::$statements['getChildren']->fetchAll(PDO::FETCH_OBJ) as
$item)
{
- // 9266 IOs in 12.3s (750 /s)
- $object = new QubitPdoInformationObject($item->id);
+ $object = new QubitPdoInformationObject($item->id, $options);
- // empty: 9266 IOs in 78.0s (119 /s) --> 58.8 w/o logging (158/s)
- // full: 9266 IOs in 209s (44/s)
$serialized = $object->serialize();
/*
if ($comp =
$this->array_compare($this->serialize(QubitInformationObject::getById($item->id)),
$serialized))
@@ -346,7 +350,6 @@
echo var_dump($comp[1]);
}
*/
- // 9266 IOs in 221s (42/s)
$document = new Elastica_Document($item->id, $serialized);
$document->setType('QubitInformationObject');
@@ -376,12 +379,48 @@
// Pass ancestors and repository down to descendants
$this->recursivelyAddInformationObjects($item->id, $totalRows, array(
'ancestors' => array_merge($object->getAncestors(), array($object)),
- 'repository' => $object->getRepository()));
+ 'repository' => $object->getRepository(),
+ 'verbose' => $options['verbose']));
}
}
}
+ public function addTerms($options = array())
+ {
+ if (!isset(self::$conn))
+ {
+ self::$conn = Propel::getConnection();
+ }
+
+ $sql = 'SELECT
+ term.id';
+ $sql .= ' FROM '.QubitTerm::TABLE_NAME.' term';
+ $sql .= ' WHERE term.taxonomy_id IN (:subject, :place)';
+ $sql .= ' AND term.id != '.QubitTerm::ROOT_ID;
+
+ $terms = QubitPdo::fetchAll($sql, array(':subject' =>
QubitTaxonomy::SUBJECT_ID,
+ ':place' =>
QubitTaxonomy::PLACE_ID));
+ $numRows = count($terms);
+
+ // Loop through results, and add to search index
+ foreach ($terms as $item)
+ {
+ $term = QubitTerm::getById($item->id);
+ $this->save($term);
+
+ // Log it
+ self::$counter++;
+
+ if ($options['verbose'])
+ {
+ $this->logger->log('QubitTerm "#'.$item->id.'" inserted
('.$this->timer->elapsed().'s) ('.self::$counter.'/'.$numRows.')',
'qtElasticSearch');
+ }
+ }
+
+ return $numRows;
+ }
+
public function addActors($options = array())
{
if (!isset(self::$conn))
@@ -404,13 +443,9 @@
// Loop through results, and add to search index
foreach ($actors as $item)
{
- // 12312 actors in 2.6s (4735 /s)
$object = new QubitPdoActor($item->id);
- // 12312 actors in 16.4s (750 /s)
$serialized = $object->serialize();
-
- // 12120 actors in 75.1s (161 /s)
/*
if ($comp =
$this->array_compare($this->serialize(QubitActor::getById($item->id)),
$serialized))
{
@@ -421,7 +456,6 @@
echo var_dump($comp[1]);
}
*/
- // 12121 actors in 22.3s (543 /s)
$document = new Elastica_Document($item->id, $serialized);
$document->setType('QubitActor');
--
You received this message because you are subscribed to the Google Groups
"Qubit Toolkit Commits" group.
To post to this group, send email to [email protected].
To unsubscribe from this group, send email to
[email protected].
For more options, visit this group at
http://groups.google.com/group/qubit-commits?hl=en.