http://www.mediawiki.org/wiki/Special:Code/MediaWiki/73342
Revision: 73342
Author: svemir
Date: 2010-09-19 21:24:13 +0000 (Sun, 19 Sep 2010)
Log Message:
-----------
initial support for new model: SphinxMWSearch (extends SearchEngine and uses
standard MW search interface) - old model is still supported (at least until
version 0.8)
Modified Paths:
--------------
trunk/extensions/SphinxSearch/SphinxSearch.php
Added Paths:
-----------
trunk/extensions/SphinxSearch/SphinxMWSearch.php
Added: trunk/extensions/SphinxSearch/SphinxMWSearch.php
===================================================================
--- trunk/extensions/SphinxSearch/SphinxMWSearch.php
(rev 0)
+++ trunk/extensions/SphinxSearch/SphinxMWSearch.php 2010-09-19 21:24:13 UTC
(rev 73342)
@@ -0,0 +1,340 @@
+<?php
+
+/**
+ * Class file for the SphinxMWSearch extension
+ *
+ * http://www.mediawiki.org/wiki/Extension:SphinxSearch
+ *
+ * Released under GNU General Public License (see
http://www.fsf.org/licenses/gpl.html)
+ *
+ * @file
+ * @ingroup Extensions
+ * @author Svemir Brkic <[email protected]>
+ */
+
+ class SphinxMWSearch extends SearchEngine {
+
+ var $categories = array();
+ var $exc_categories = array();
+ var $db;
+ var $sphinx_client = null;
+
+ function __construct( $db ) {
+ $this->db = $db;
+ }
+
+ /**
+ * Perform a full text search query and return a result set.
+ *
+ * @param string $term - Raw search term
+ * @return SphinxMWSearchResultSet
+ * @access public
+ */
+ function searchText( $term ) {
+ global $wgSphinxSearch_index_list;
+
+ if ( !$this->sphinx_client ) {
+ $this->sphinx_client = $this->prepareSphinxClient(
$term );
+ }
+
+ if ( $this->sphinx_client ) {
+ $this->searchTerms = $term;
+ $escape = '/';
+ $delims = array(
+ '(' => ')',
+ '[' => ']',
+ '"' => '',
+ );
+ // temporarily replace already escaped characters
+ $placeholders = array(
+ '\\(' => '_PLC_O_PAR_',
+ '\\)' => '_PLC_C_PAR_',
+ '\\[' => '_PLC_O_BRA_',
+ '\\]' => '_PLC_C_BRA_',
+ '\\"' => '_PLC_QUOTE_',
+ );
+ $term = str_replace(array_keys($placeholders),
$placeholders, $term);
+ foreach ($delims as $open => $close) {
+ $open_cnt = substr_count( $term, $open );
+ if ($close) {
+ // if counts do not match, escape them
all
+ $close_cnt = substr_count( $term,
$close );
+ if ($open_cnt != $close_cnt) {
+ $escape .= $open . $close;
+ }
+ } elseif ($open_cnt % 2 == 1) {
+ // if there is no closing symbol, count
should be even
+ $escape .= $open;
+ }
+ }
+ $term = str_replace($placeholders,
array_keys($placeholders), $term);
+ $resultSet = $this->sphinx_client->Query(
+ addcslashes( $term, $escape ),
+ $wgSphinxSearch_index_list
+ );
+ } else {
+ $resultSet = false;
+ }
+
+ if ( $resultSet === false ) {
+ return null;
+ } else {
+ return new SphinxMWSearchResultSet( $resultSet, $term,
$this->sphinx_client, $this->db );
+ }
+ }
+
+ /**
+ * We do a weighted title/body search, no need to return titles
separately
+ *
+ * @param string $term - Raw search term
+ * @return SphinxMWSearchResultSet
+ * @access public
+ */
+ function searchTitle( $term ) {
+ return null;
+ }
+
+ /**
+ * @return SphinxClient: ready to run or false if term is empty
+ */
+ function prepareSphinxClient( &$term ) {
+ global $wgSphinxSearch_sortmode, $wgSphinxSearch_sortby,
$wgSphinxSearch_host,
+ $wgSphinxSearch_port, $wgSphinxSearch_index_weights,
$wgSphinxSearch_index,
+ $wgSphinxSearch_mode, $wgSphinxMatchAll,
$wgSphinxSearch_maxmatches,
+ $wgSphinxSearch_cutoff, $wgSphinxSearch_weights;
+
+ // don't do anything for blank searches
+ if ( trim( $term ) === '' ) {
+ return false;
+ }
+
+ wfRunHooks( 'SphinxSearchBeforeResults', array(
+ &$term,
+ &$this->offset,
+ &$this->namespaces,
+ &$this->categories,
+ &$this->exc_categories
+ ) );
+
+ $cl = new SphinxClient();
+
+ // setup the options for searching
+ if ( isset( $wgSphinxSearch_host ) && isset(
$wgSphinxSearch_port ) ) {
+ $cl->SetServer( $wgSphinxSearch_host,
$wgSphinxSearch_port );
+ }
+ if ( count( $wgSphinxSearch_weights ) ) {
+ $cl->SetFieldWeights( $wgSphinxSearch_weights );
+ }
+ if ( is_array( $wgSphinxSearch_index_weights ) ) {
+ $cl->SetIndexWeights( $wgSphinxSearch_index_weights );
+ }
+ if ( isset( $wgSphinxSearch_mode ) ) {
+ $cl->SetMatchMode( $wgSphinxSearch_mode );
+ }
+ if ( count( $this->namespaces ) ) {
+ $cl->SetFilter( 'page_namespace', $this->namespaces );
+ }
+ if( !$this->showRedirects ) {
+ $cl->SetFilter( 'page_is_redirect', array( 0 ) );
+ }
+ if ( count( $this->categories ) ) {
+ $cl->SetFilter( 'category', $this->categories );
+ }
+ if ( count( $this->exc_categories ) ) {
+ $cl->SetFilter( 'category', $this->exc_categories, true
);
+ }
+ $cl->SetSortMode( $wgSphinxSearch_sortmode,
$wgSphinxSearch_sortby );
+ $cl->SetLimits(
+ $this->offset,
+ $this->limit,
+ $wgSphinxSearch_maxmatches,
+ $wgSphinxSearch_cutoff
+ );
+
+ wfRunHooks( 'SphinxSearchBeforeQuery', array( &$term, &$cl ) );
+
+ return $cl;
+ }
+
+ /**
+ * @return Boolean: can we list/unlist redirects
+ */
+ function acceptListRedirects() {
+ return true;
+ }
+
+ /**
+ * @return String: allowed query characters
+ */
+ public static function legalSearchChars() {
+ return "A-Za-z_'./\"!~0-9\\x80-\\xFF\\-";
+ }
+
+ }
+
+/**
+ * @ingroup Search
+ */
+class SphinxMWSearchResultSet extends SearchResultSet {
+ var $mNdx = 0;
+ var $sphinx_client = null;
+ var $mSuggestion = '';
+
+ function __construct( $resultSet, $terms, $sphinx_client, $dbr ) {
+ global $wgSphinxSearch_index;
+
+ $this->sphinx_client = $sphinx_client;
+ $this->mResultSet = array();
+
+ if ( is_array( $resultSet ) && is_array( $resultSet['matches']
) ) {
+ foreach ( $resultSet['matches'] as $id => $docinfo ) {
+ $res = $dbr->select(
+ 'page',
+ array( 'page_id', 'page_title',
'page_namespace' ),
+ array( 'page_id' => $id ),
+ __METHOD__,
+ array()
+ );
+ if ( $dbr->numRows( $res ) > 0 ) {
+ $this->mResultSet[] =
$dbr->fetchObject( $res );
+ }
+ }
+ }
+ $this->mNdx = 0;
+ $this->mTerms = preg_split('/\W+/', $terms);
+ }
+
+ /**
+ * Some search modes return a suggested alternate term if there are
+ * no exact hits. Returns true if there is one on this set.
+ *
+ * @return Boolean
+ */
+ function hasSuggestion() {
+ global $wgSphinxSuggestMode;
+
+ if ( $wgSphinxSuggestMode ) {
+ // Initial (weak) implementation - will be replaced
+ $dbr = wfGetDB( DB_SLAVE );
+ $res = $dbr->select(
+ array( 'page' ),
+ array( 'page_title' ),
+ array( "page_title SOUNDS LIKE " .
$dbr->addQuotes($this->mTerms[0]) ),
+ __METHOD__,
+ array(
+ 'ORDER BY' => 'page_counter desc',
+ 'LIMIT' => 1
+ )
+ );
+ $suggestion = $dbr->fetchObject ( $res );
+ $this->mSuggestion = $suggestion->page_title;
+ if ($this->mSuggestion) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ /**
+ * @return String: suggested query, null if none
+ */
+ function getSuggestionQuery(){
+ return $this->mSuggestion;
+ }
+
+ /**
+ * @return String: HTML highlighted suggested query, '' if none
+ */
+ function getSuggestionSnippet(){
+ return $this->mSuggestion;
+ }
+
+ /**
+ * @return Array: search terms
+ */
+ function termMatches() {
+ return $this->mTerms;
+ }
+
+ /**
+ * @return Integer: number of results
+ */
+ function numRows() {
+ return count( $this->mResultSet );
+ }
+
+ /**
+ * @return SphinxMWSearchResult: next result, false if none
+ */
+ function next() {
+ if ( isset( $this->mResultSet[$this->mNdx] ) ) {
+ $row = $this->mResultSet[$this->mNdx];
+ ++$this->mNdx;
+ return new SphinxMWSearchResult( $row,
$this->sphinx_client );
+ } else {
+ return false;
+ }
+ }
+
+ function free() {
+ unset( $this->mResultSet );
+ }
+
+}
+
+class SphinxMWSearchResult extends SearchResult {
+
+ var $sphinx_client = null;
+
+ function __construct( $row, $sphinx_client ) {
+ $this->sphinx_client = $sphinx_client;
+ parent::__construct( $row );
+ }
+
+ /**
+ * @param $terms Array: terms to highlight
+ * @return String: highlighted text snippet, null (and not '') if not
supported
+ */
+ function getTextSnippet( $terms ){
+ global $wgUser, $wgSphinxSearchMWHighlighter,
$wgSphinxSearch_index;
+
+ if ( $wgSphinxSearchMWHighlighter ) {
+ return parent::getTextSnippet( $terms );
+ }
+
+ $this->initText();
+
+ list( $contextlines, $contextchars ) =
SearchEngine::userHighlightPrefs( $wgUser );
+
+ $excerpts_opt = array(
+ "before_match" => "<span class='searchmatch'>",
+ "after_match" => "</span>",
+ "chunk_separator" => " ... ",
+ "limit" => $contextlines * $contextchars,
+ "around" => $contextchars
+ );
+
+ $excerpts = $this->sphinx_client->BuildExcerpts(
+ array( $this->mText ),
+ $wgSphinxSearch_index,
+ join(' ', $terms),
+ $excerpts_opt
+ );
+
+ if ( is_array( $excerpts ) ) {
+ $ret = '';
+ foreach ( $excerpts as $entry ) {
+ // remove some wiki markup
+ $entry = preg_replace(
'/([\[\]\{\}\*\#\|\!]+|==+)/',
+ ' ',
+ strip_tags( $entry, '<span><br>' )
+ );
+ $ret .= "<div style='margin: 0.2em 1em 0.2em
1em;'>$entry</div>\n";
+ }
+ } else {
+ $ret = wfMsg( 'sphinxSearchWarning',
$this->sphinx_client->GetLastError() );
+ }
+ return $ret;
+ }
+
+}
\ No newline at end of file
Modified: trunk/extensions/SphinxSearch/SphinxSearch.php
===================================================================
--- trunk/extensions/SphinxSearch/SphinxSearch.php 2010-09-19 21:22:13 UTC
(rev 73341)
+++ trunk/extensions/SphinxSearch/SphinxSearch.php 2010-09-19 21:24:13 UTC
(rev 73342)
@@ -11,7 +11,7 @@
$wgExtensionCredits['specialpage'][] = array(
'path' => __FILE__,
- 'version' => '0.7.1',
+ 'version' => '0.7.2',
'name' => 'SphinxSearch',
'author' => array( 'Svemir Brkic', 'Paul Grinberg' ),
'email' => 'svemir at deveblog dot com, gri6507 at yahoo dot
com',
@@ -21,29 +21,35 @@
$dir = dirname( __FILE__ ) . '/';
-$wgAutoloadClasses['SphinxSearch'] = $dir . 'SphinxSearch_body.php';
$wgExtensionMessagesFiles['SphinxSearch'] = $dir . 'SphinxSearch.i18n.php';
-$wgExtensionAliasesFiles['SphinxSearch'] = $dir . 'SphinxSearch.alias.php';
# To completely disable the default search and replace it with SphinxSearch,
# set this BEFORE including SphinxSearch.php in LocalSettings.php
# $wgSearchType = 'SphinxSearch';
-
-if ( $wgSearchType == 'SphinxSearch' ) {
- $wgDisableInternalSearch = true;
- $wgDisableSearchUpdate = true;
- $wgSpecialPages['Search'] = 'SphinxSearch';
+# To use the new approach (added in 0.7.2) set it to SphinxMWSearch
+if ( $wgSearchType == 'SphinxMWSearch' ) {
+ $wgAutoloadClasses['SphinxMWSearch'] = $dir . 'SphinxMWSearch.php';
} else {
- $wgSpecialPages['SphinxSearch'] = 'SphinxSearch';
+ if ( $wgSearchType == 'SphinxSearch' ) {
+ $wgAutoloadClasses['SphinxSearch'] = $dir .
'SphinxSearch_body.php';
+ $wgDisableInternalSearch = true;
+ $wgDisableSearchUpdate = true;
+ $wgSpecialPages['Search'] = 'SphinxSearch';
+ $wgDisableSearchUpdate = true;
+ } else {
+ $wgExtensionAliasesFiles['SphinxSearch'] = $dir .
'SphinxSearch.alias.php';
+ $wgSpecialPages['SphinxSearch'] = 'SphinxSearch';
+ }
}
# this assumes you have copied sphinxapi.php from your Sphinx
# installation folder to your SphinxSearch extension folder
+# not needed if you install http://pecl.php.net/package/sphinx
if ( !class_exists( 'SphinxClient' ) ) {
require_once ( $dir . "sphinxapi.php" );
}
-# Host and port on which searchd deamon is tunning
+# Host and port on which searchd deamon is running
$wgSphinxSearch_host = 'localhost';
$wgSphinxSearch_port = 9312;
@@ -70,44 +76,43 @@
$wgSphinxSearch_sortmode = SPH_SORT_RELEVANCE;
$wgSphinxSearch_sortby = '';
-# By default, search will return articles that match any of the words in the
search
-# To change that to require all words to match by default, set the following
to true
-$wgSphinxMatchAll = false;
+if ( $wgSearchType == 'SphinxMWSearch' ) {
+ # Following settings apply only in the new search model
-# Number of matches to display at once
-$wgSphinxSearch_matches = 10;
-# How many matches searchd will keep in RAM while searching
-$wgSphinxSearch_maxmatches = 1000;
-# When to stop searching all together (if not zero)
-$wgSphinxSearch_cutoff = 0;
+ # Set to true to use MW's default search snippets and highlighting
+ $wgSphinxSearchMWHighlighter = false;
+} else {
+ # Following settings apply only in the old search model
-# Weights of individual indexed columns. This gives page titles extra weight
-$wgSphinxSearch_weights = array(
- 'old_text' => 1,
- 'page_title' => 100
-);
+ # By default, search will return articles that match any of the words
in the search
+ # To change that to require all words to match by default, set the
following to true
+ $wgSphinxMatchAll = false;
+
+ # Number of matches to display at once
+ $wgSphinxSearch_matches = 10;
-# To enable hierarchical category search, specify the top category of your
hierarchy
-$wgSphinxTopSearchableCategory = '';
+ # To enable hierarchical category search, specify the top category of
your hierarchy
+ $wgSphinxTopSearchableCategory = '';
+
+ # This will fetch sub-categories as parent categories are checked
+ # Requires $wgUseAjax to be true
+ $wgAjaxExportList[] = 'SphinxSearch::ajaxGetCategoryChildren';
+
+ # Allow excluding selected categories when filtering
+ $wgUseExcludes = false;
-# This will fetch sub-categories as parent categories are checked
-# Requires $wgUseAjax to be true
-$wgAjaxExportList[] = 'SphinxSearch::ajaxGetCategoryChildren';
+ # Web-accessible path to the extension's folder
+ $wgSphinxSearchExtPath = $wgScriptPath . '/extensions/SphinxSearch';
+
+ # Web-accessible path to the folder with SphinxSearch.js file (if
different from $wgSphinxSearchExtPath)
+ $wgSphinxSearchJSPath = '';
+}
-# EXPERIMENTAL: allow excluding selected categories when filtering
-$wgUseExcludes = false;
-
-# Web-accessible path to the extension's folder
-$wgSphinxSearchExtPath = $wgScriptPath . '/extensions/SphinxSearch';
-
-# Web-accessible path to the folder with SphinxSearch.js file (if different
from $wgSphinxSearchExtPath)
-$wgSphinxSearchJSPath = '';
-
# #########################################################
# Use Aspell to suggest possible misspellings. This can be provided via
# PHP pspell module (http://www.php.net/manual/en/ref.pspell.php)
# or command line insterface to ASpell
-
+
# Should the suggestion mode be enabled?
$wgSphinxSuggestMode = false;
@@ -119,3 +124,15 @@
# Path to aspell location and language data files. Do not set if not sure.
$wgSphinxSearchPspellDictionaryDir = '';
+
+# How many matches searchd will keep in RAM while searching
+$wgSphinxSearch_maxmatches = 1000;
+
+# When to stop searching all together (if not zero)
+$wgSphinxSearch_cutoff = 0;
+
+# Weights of individual indexed columns. This gives page titles extra weight
+$wgSphinxSearch_weights = array(
+ 'old_text' => 1,
+ 'page_title' => 100
+);
_______________________________________________
MediaWiki-CVS mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs