Manybubbles has uploaded a new change for review. https://gerrit.wikimedia.org/r/84761
Change subject: Highlighting and search operator fixes. ...................................................................... Highlighting and search operator fixes. We shouldn't be indexing html in the headings so I've stripped it. Highlighting should be sorted by score and we need to fragment the array fields or else get all of the ones that match back and just pick the first. Fragmenting them lets them get sorted by score even if they are small. The default search operator needs to be and. It was and, but this was lost in the great gerrit catastrophe of September 5, 2013. Bug: 53955 Change-Id: I5aaf621002bef78db15c44d71a19aa84759df7f2 --- M CirrusSearchSearcher.php M CirrusSearchUpdater.php 2 files changed, 19 insertions(+), 6 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/CirrusSearch refs/changes/61/84761/1 diff --git a/CirrusSearchSearcher.php b/CirrusSearchSearcher.php index 64bdcae..16f3919 100644 --- a/CirrusSearchSearcher.php +++ b/CirrusSearchSearcher.php @@ -151,6 +151,7 @@ $this->query->setFields( $fields ); $this->query->setAutoGeneratePhraseQueries( true ); $this->query->setPhraseSlop( 3 ); + $this->query->setDefaultOperator( 'AND' ); // TODO phrase match boosts? $this->suggest = array( 'text' => $term, @@ -411,15 +412,24 @@ public function getFields() { return array( 'id', 'title', 'namespace', 'redirect' ); } + /** + * Setup highlighting. + * Don't fragment title because it is small. + * Get just one fragment from the text because that is all we will display. + * Get one fragment from redirect title and heading each or else they + * won't be sorted by score. + * @return array of highlighting configuration + */ public function getHighlightingConfiguration() { return array( + 'order' => 'score', 'pre_tags' => array( CirrusSearchSearcher::HIGHLIGHT_PRE ), 'post_tags' => array( CirrusSearchSearcher::HIGHLIGHT_POST ), 'fields' => array( - 'title' => array( 'number_of_fragments' => 0 ), // Don't fragment the title - it is too small. + 'title' => array( 'number_of_fragments' => 0 ), 'text' => array( 'number_of_fragments' => 1 ), - 'redirect.title' => array( 'number_of_fragments' => 0 ), // The redirect field is just like the title field. - 'heading' => array( 'number_of_fragments' => 0), // Too small to fragment + 'redirect.title' => array( 'number_of_fragments' => 1 ), + 'heading' => array( 'number_of_fragments' => 1), ), ); } @@ -549,7 +559,7 @@ $this->textSnippet = implode( "\n", array_slice( explode( "\n", $text ), 0, $contextLines ) ); } if ( isset( $highlights[ 'heading' ] ) ) { - $this->sectionSnippet = $highlights[ 'heading' ][ 0 ]; + $this->sectionSnippet = self::escapeHighlightedText( $highlights[ 'heading' ][ 0 ] ); $this->sectionTitle = $this->findSectionTitle(); } else { $this->sectionSnippet = ''; diff --git a/CirrusSearchUpdater.php b/CirrusSearchUpdater.php index b26fce9..6d22e60 100644 --- a/CirrusSearchUpdater.php +++ b/CirrusSearchUpdater.php @@ -188,10 +188,13 @@ $headings = array(); $ignoredHeadings = self::getIgnoredHeadings(); foreach ( $parserOutput->getSections() as $heading ) { + $heading = $heading[ 'line' ]; + // Strip tags from the heading or else we'll display them (escaped) in search results + $heading = Sanitizer::stripAllTags( $heading ); // Note that we don't take the level of the heading into account - all headings are equal. // Except the ones we ignore. - if ( !in_array( $heading[ 'line' ], $ignoredHeadings ) ) { - $headings[] = $heading[ 'line' ]; + if ( !in_array( $heading, $ignoredHeadings ) ) { + $headings[] = $heading; } } -- To view, visit https://gerrit.wikimedia.org/r/84761 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I5aaf621002bef78db15c44d71a19aa84759df7f2 Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/extensions/CirrusSearch Gerrit-Branch: master Gerrit-Owner: Manybubbles <never...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits