http://www.mediawiki.org/wiki/Special:Code/MediaWiki/73342

Revision: 73342
Author:   svemir
Date:     2010-09-19 21:24:13 +0000 (Sun, 19 Sep 2010)

Log Message:
-----------
initial support for new model: SphinxMWSearch (extends SearchEngine and uses 
standard MW search interface) - old model is still supported (at least until 
version 0.8)

Modified Paths:
--------------
    trunk/extensions/SphinxSearch/SphinxSearch.php

Added Paths:
-----------
    trunk/extensions/SphinxSearch/SphinxMWSearch.php

Added: trunk/extensions/SphinxSearch/SphinxMWSearch.php
===================================================================
--- trunk/extensions/SphinxSearch/SphinxMWSearch.php                            
(rev 0)
+++ trunk/extensions/SphinxSearch/SphinxMWSearch.php    2010-09-19 21:24:13 UTC 
(rev 73342)
@@ -0,0 +1,340 @@
+<?php
+
+/**
+ * Class file for the SphinxMWSearch extension
+ *
+ * http://www.mediawiki.org/wiki/Extension:SphinxSearch
+ *
+ * Released under GNU General Public License (see 
http://www.fsf.org/licenses/gpl.html)
+ *
+ * @file
+ * @ingroup Extensions
+ * @author Svemir Brkic <[email protected]>
+ */
+
+ class SphinxMWSearch extends SearchEngine {
+
+       var $categories = array();
+       var $exc_categories = array();
+       var $db;
+       var $sphinx_client = null;
+       
+       function __construct( $db ) {
+               $this->db = $db;
+       }
+
+       /**
+        * Perform a full text search query and return a result set.
+        *
+        * @param string $term - Raw search term
+        * @return SphinxMWSearchResultSet
+        * @access public
+        */
+       function searchText( $term ) {
+               global $wgSphinxSearch_index_list;
+
+               if ( !$this->sphinx_client ) {
+                       $this->sphinx_client = $this->prepareSphinxClient( 
$term );
+               }
+
+               if ( $this->sphinx_client ) {
+                       $this->searchTerms = $term;
+                       $escape = '/';
+                       $delims = array(
+                               '(' => ')',
+                               '[' => ']',
+                               '"' => '',
+                       );
+                       // temporarily replace already escaped characters
+                       $placeholders = array(
+                               '\\(' => '_PLC_O_PAR_',
+                               '\\)' => '_PLC_C_PAR_',
+                               '\\[' => '_PLC_O_BRA_',
+                               '\\]' => '_PLC_C_BRA_',
+                               '\\"' => '_PLC_QUOTE_',
+                       );
+                       $term = str_replace(array_keys($placeholders), 
$placeholders, $term);
+                       foreach ($delims as $open => $close) {
+                               $open_cnt = substr_count( $term, $open );
+                               if ($close) {
+                                       // if counts do not match, escape them 
all
+                                       $close_cnt = substr_count( $term, 
$close );
+                                       if ($open_cnt != $close_cnt) {
+                                               $escape .= $open . $close;
+                                       }
+                               } elseif ($open_cnt % 2 == 1) {
+                                       // if there is no closing symbol, count 
should be even
+                                       $escape .= $open;
+                               }
+                       }
+                       $term = str_replace($placeholders, 
array_keys($placeholders), $term);
+                       $resultSet = $this->sphinx_client->Query(
+                               addcslashes( $term, $escape ),
+                               $wgSphinxSearch_index_list
+                       );
+               } else {
+                       $resultSet = false;
+               }
+               
+               if ( $resultSet === false ) {
+                       return null;
+               } else {
+                       return new SphinxMWSearchResultSet( $resultSet, $term, 
$this->sphinx_client, $this->db );
+               }
+       }
+
+       /**
+        * We do a weighted title/body search, no need to return titles 
separately
+        *
+        * @param string $term - Raw search term
+        * @return SphinxMWSearchResultSet
+        * @access public
+        */
+       function searchTitle( $term ) {
+               return null;
+       }
+
+       /**
+        * @return SphinxClient: ready to run or false if term is empty
+        */
+       function prepareSphinxClient( &$term ) {
+               global $wgSphinxSearch_sortmode, $wgSphinxSearch_sortby, 
$wgSphinxSearch_host,
+                       $wgSphinxSearch_port, $wgSphinxSearch_index_weights, 
$wgSphinxSearch_index,
+                       $wgSphinxSearch_mode, $wgSphinxMatchAll, 
$wgSphinxSearch_maxmatches,
+                       $wgSphinxSearch_cutoff, $wgSphinxSearch_weights;
+
+               // don't do anything for blank searches
+               if ( trim( $term ) === '' ) {
+                       return false;
+               }
+
+               wfRunHooks( 'SphinxSearchBeforeResults', array(
+                       &$term,
+                       &$this->offset,
+                       &$this->namespaces,
+                       &$this->categories,
+                       &$this->exc_categories
+               ) );
+
+               $cl = new SphinxClient();
+
+               // setup the options for searching
+               if ( isset( $wgSphinxSearch_host ) && isset( 
$wgSphinxSearch_port ) ) {
+                       $cl->SetServer( $wgSphinxSearch_host, 
$wgSphinxSearch_port );
+               }
+               if ( count( $wgSphinxSearch_weights ) ) {
+                       $cl->SetFieldWeights( $wgSphinxSearch_weights );
+               }
+               if ( is_array( $wgSphinxSearch_index_weights ) ) {
+                       $cl->SetIndexWeights( $wgSphinxSearch_index_weights );
+               }
+               if ( isset( $wgSphinxSearch_mode ) ) {
+                       $cl->SetMatchMode( $wgSphinxSearch_mode );
+               }
+               if ( count( $this->namespaces ) ) {
+                       $cl->SetFilter( 'page_namespace', $this->namespaces );
+               }
+               if( !$this->showRedirects ) {
+                       $cl->SetFilter( 'page_is_redirect', array( 0 ) );
+               }
+               if ( count( $this->categories ) ) {
+                       $cl->SetFilter( 'category', $this->categories );
+               }
+               if ( count( $this->exc_categories ) ) {
+                       $cl->SetFilter( 'category', $this->exc_categories, true 
);
+               }
+               $cl->SetSortMode( $wgSphinxSearch_sortmode, 
$wgSphinxSearch_sortby );
+               $cl->SetLimits(
+                       $this->offset,
+                       $this->limit,
+                       $wgSphinxSearch_maxmatches,
+                       $wgSphinxSearch_cutoff
+               );
+
+               wfRunHooks( 'SphinxSearchBeforeQuery', array( &$term, &$cl ) );
+
+               return $cl;
+       }
+
+       /**
+        * @return Boolean: can we list/unlist redirects
+        */
+       function acceptListRedirects() {
+               return true;
+       }
+
+       /**
+        * @return String: allowed query characters
+        */
+       public static function legalSearchChars() {
+               return "A-Za-z_'./\"!~0-9\\x80-\\xFF\\-";
+       }
+
+ }
+
+/**
+ * @ingroup Search
+ */
+class SphinxMWSearchResultSet extends SearchResultSet {
+       var $mNdx = 0;
+       var $sphinx_client = null;
+       var $mSuggestion = '';
+       
+       function __construct( $resultSet, $terms, $sphinx_client, $dbr ) {
+               global $wgSphinxSearch_index;
+
+               $this->sphinx_client = $sphinx_client;
+               $this->mResultSet = array();
+
+               if ( is_array( $resultSet ) && is_array( $resultSet['matches'] 
) ) {
+                       foreach ( $resultSet['matches'] as $id => $docinfo ) {
+                               $res = $dbr->select(
+                                       'page',
+                                       array( 'page_id', 'page_title', 
'page_namespace' ),
+                                       array( 'page_id' => $id ),
+                                       __METHOD__,
+                                       array()
+                               );
+                               if ( $dbr->numRows( $res ) > 0 ) {
+                                       $this->mResultSet[] = 
$dbr->fetchObject( $res );
+                               }
+                       }
+               }
+               $this->mNdx = 0;
+               $this->mTerms = preg_split('/\W+/', $terms);
+       }
+
+       /**
+        * Some search modes return a suggested alternate term if there are
+        * no exact hits. Returns true if there is one on this set.
+        *
+        * @return Boolean
+        */
+       function hasSuggestion() {
+               global $wgSphinxSuggestMode;
+               
+               if ( $wgSphinxSuggestMode ) {
+                       // Initial (weak) implementation - will be replaced
+                       $dbr = wfGetDB( DB_SLAVE );
+                       $res = $dbr->select(
+                               array( 'page' ),
+                               array( 'page_title' ),
+                               array( "page_title SOUNDS LIKE " . 
$dbr->addQuotes($this->mTerms[0]) ),
+                               __METHOD__,
+                               array(
+                                       'ORDER BY' => 'page_counter desc',
+                                       'LIMIT' => 1
+                               )
+                       );
+                       $suggestion = $dbr->fetchObject ( $res );
+                       $this->mSuggestion = $suggestion->page_title;
+                       if ($this->mSuggestion) {
+                               return true;
+                       }
+               }
+               return false;
+       }
+
+       /**
+        * @return String: suggested query, null if none
+        */
+       function getSuggestionQuery(){
+               return $this->mSuggestion;
+       }
+
+       /**
+        * @return String: HTML highlighted suggested query, '' if none
+        */
+       function getSuggestionSnippet(){
+               return $this->mSuggestion;
+       }
+
+       /**
+        * @return Array: search terms
+        */
+       function termMatches() {
+               return $this->mTerms;
+       }
+
+       /**
+        * @return Integer: number of results
+        */
+       function numRows() {
+               return count( $this->mResultSet );
+       }
+
+       /**
+        * @return SphinxMWSearchResult: next result, false if none
+        */
+       function next() {
+               if ( isset( $this->mResultSet[$this->mNdx] ) ) {
+                       $row = $this->mResultSet[$this->mNdx];
+                       ++$this->mNdx;
+                       return new SphinxMWSearchResult( $row, 
$this->sphinx_client );
+               } else {
+                       return false;
+               }
+       }
+
+       function free() {
+               unset( $this->mResultSet );
+       }
+
+}
+
+class SphinxMWSearchResult extends SearchResult {
+
+       var $sphinx_client = null;
+       
+       function __construct( $row, $sphinx_client ) {
+               $this->sphinx_client = $sphinx_client;
+               parent::__construct( $row );
+       }
+       
+       /**
+        * @param $terms Array: terms to highlight
+        * @return String: highlighted text snippet, null (and not '') if not 
supported
+        */
+       function getTextSnippet( $terms ){
+               global $wgUser, $wgSphinxSearchMWHighlighter, 
$wgSphinxSearch_index;
+
+               if ( $wgSphinxSearchMWHighlighter ) {
+                       return parent::getTextSnippet( $terms );
+               }
+
+               $this->initText();
+
+               list( $contextlines, $contextchars ) = 
SearchEngine::userHighlightPrefs( $wgUser );
+
+               $excerpts_opt = array(
+                       "before_match"    => "<span class='searchmatch'>",
+                       "after_match"     => "</span>",
+                       "chunk_separator" => " ... ",
+                       "limit"           => $contextlines * $contextchars,
+                       "around"          => $contextchars
+               );
+
+               $excerpts = $this->sphinx_client->BuildExcerpts(
+                       array( $this->mText ),
+                       $wgSphinxSearch_index,
+                       join(' ', $terms),
+                       $excerpts_opt
+               );
+
+               if ( is_array( $excerpts ) ) {
+                       $ret = '';
+                       foreach ( $excerpts as $entry ) {
+                               // remove some wiki markup
+                               $entry = preg_replace( 
'/([\[\]\{\}\*\#\|\!]+|==+)/',
+                                       ' ',
+                                       strip_tags( $entry, '<span><br>' )
+                               );
+                               $ret .= "<div style='margin: 0.2em 1em 0.2em 
1em;'>$entry</div>\n";
+                       }
+               } else {
+                       $ret = wfMsg( 'sphinxSearchWarning', 
$this->sphinx_client->GetLastError() );
+               }       
+               return $ret;
+       }
+
+}
\ No newline at end of file

Modified: trunk/extensions/SphinxSearch/SphinxSearch.php
===================================================================
--- trunk/extensions/SphinxSearch/SphinxSearch.php      2010-09-19 21:22:13 UTC 
(rev 73341)
+++ trunk/extensions/SphinxSearch/SphinxSearch.php      2010-09-19 21:24:13 UTC 
(rev 73342)
@@ -11,7 +11,7 @@
 
 $wgExtensionCredits['specialpage'][] = array(
        'path'           => __FILE__,
-       'version'        => '0.7.1',
+       'version'        => '0.7.2',
        'name'           => 'SphinxSearch',
        'author'         => array( 'Svemir Brkic', 'Paul Grinberg' ),
        'email'          => 'svemir at deveblog dot com, gri6507 at yahoo dot 
com',
@@ -21,29 +21,35 @@
 
 $dir = dirname( __FILE__ ) . '/';
 
-$wgAutoloadClasses['SphinxSearch'] = $dir . 'SphinxSearch_body.php';
 $wgExtensionMessagesFiles['SphinxSearch'] = $dir . 'SphinxSearch.i18n.php';
-$wgExtensionAliasesFiles['SphinxSearch'] = $dir . 'SphinxSearch.alias.php';
 
 # To completely disable the default search and replace it with SphinxSearch,
 # set this BEFORE including SphinxSearch.php in LocalSettings.php
 # $wgSearchType = 'SphinxSearch';
-
-if ( $wgSearchType == 'SphinxSearch' ) {
-       $wgDisableInternalSearch = true;
-       $wgDisableSearchUpdate = true;
-       $wgSpecialPages['Search'] = 'SphinxSearch';
+# To use the new approach (added in 0.7.2) set it to SphinxMWSearch
+if ( $wgSearchType == 'SphinxMWSearch' ) {
+       $wgAutoloadClasses['SphinxMWSearch'] = $dir . 'SphinxMWSearch.php';
 } else {
-       $wgSpecialPages['SphinxSearch'] = 'SphinxSearch';
+       if ( $wgSearchType == 'SphinxSearch' ) {
+               $wgAutoloadClasses['SphinxSearch'] = $dir . 
'SphinxSearch_body.php';
+               $wgDisableInternalSearch = true;
+               $wgDisableSearchUpdate = true;
+               $wgSpecialPages['Search'] = 'SphinxSearch';
+               $wgDisableSearchUpdate = true;
+       } else {
+               $wgExtensionAliasesFiles['SphinxSearch'] = $dir . 
'SphinxSearch.alias.php';
+               $wgSpecialPages['SphinxSearch'] = 'SphinxSearch';
+       }
 }
 
 # this assumes you have copied sphinxapi.php from your Sphinx
 # installation folder to your SphinxSearch extension folder
+# not needed if you install http://pecl.php.net/package/sphinx
 if ( !class_exists( 'SphinxClient' ) ) {
        require_once ( $dir . "sphinxapi.php" );
 }
 
-# Host and port on which searchd deamon is tunning
+# Host and port on which searchd deamon is running
 $wgSphinxSearch_host = 'localhost';
 $wgSphinxSearch_port = 9312;
 
@@ -70,44 +76,43 @@
 $wgSphinxSearch_sortmode = SPH_SORT_RELEVANCE;
 $wgSphinxSearch_sortby = '';
 
-# By default, search will return articles that match any of the words in the 
search
-# To change that to require all words to match by default, set the following 
to true
-$wgSphinxMatchAll = false;
+if ( $wgSearchType == 'SphinxMWSearch' ) {
+       # Following settings apply only in the new search model
 
-# Number of matches to display at once
-$wgSphinxSearch_matches = 10;
-# How many matches searchd will keep in RAM while searching
-$wgSphinxSearch_maxmatches = 1000;
-# When to stop searching all together (if not zero)
-$wgSphinxSearch_cutoff = 0;
+       # Set to true to use MW's default search snippets and highlighting
+       $wgSphinxSearchMWHighlighter = false;
+} else {
+       # Following settings apply only in the old search model
 
-# Weights of individual indexed columns. This gives page titles extra weight
-$wgSphinxSearch_weights = array(
-       'old_text' => 1,
-       'page_title' => 100
-);
+       # By default, search will return articles that match any of the words 
in the search
+       # To change that to require all words to match by default, set the 
following to true
+       $wgSphinxMatchAll = false;
+       
+       # Number of matches to display at once
+       $wgSphinxSearch_matches = 10;
 
-# To enable hierarchical category search, specify the top category of your 
hierarchy
-$wgSphinxTopSearchableCategory = '';
+       # To enable hierarchical category search, specify the top category of 
your hierarchy
+       $wgSphinxTopSearchableCategory = '';
+       
+       # This will fetch sub-categories as parent categories are checked
+       # Requires $wgUseAjax to be true
+       $wgAjaxExportList[] = 'SphinxSearch::ajaxGetCategoryChildren';
+       
+       # Allow excluding selected categories when filtering
+       $wgUseExcludes = false;
 
-# This will fetch sub-categories as parent categories are checked
-# Requires $wgUseAjax to be true
-$wgAjaxExportList[] = 'SphinxSearch::ajaxGetCategoryChildren';
+       # Web-accessible path to the extension's folder
+       $wgSphinxSearchExtPath = $wgScriptPath . '/extensions/SphinxSearch';
+       
+       # Web-accessible path to the folder with SphinxSearch.js file (if 
different from $wgSphinxSearchExtPath)
+       $wgSphinxSearchJSPath = '';
+}
 
-# EXPERIMENTAL: allow excluding selected categories when filtering
-$wgUseExcludes = false;
-
-# Web-accessible path to the extension's folder
-$wgSphinxSearchExtPath = $wgScriptPath . '/extensions/SphinxSearch';
-
-# Web-accessible path to the folder with SphinxSearch.js file (if different 
from $wgSphinxSearchExtPath)
-$wgSphinxSearchJSPath = '';
-
 # #########################################################
 # Use Aspell to suggest possible misspellings. This can be provided via
 # PHP pspell module (http://www.php.net/manual/en/ref.pspell.php)
 # or command line insterface to ASpell
-
+       
 # Should the suggestion mode be enabled?
 $wgSphinxSuggestMode = false;
 
@@ -119,3 +124,15 @@
 
 # Path to aspell location and language data files. Do not set if not sure.
 $wgSphinxSearchPspellDictionaryDir = '';
+
+# How many matches searchd will keep in RAM while searching
+$wgSphinxSearch_maxmatches = 1000;
+
+# When to stop searching all together (if not zero)
+$wgSphinxSearch_cutoff = 0;
+
+# Weights of individual indexed columns. This gives page titles extra weight
+$wgSphinxSearch_weights = array(
+       'old_text' => 1,
+       'page_title' => 100
+);



_______________________________________________
MediaWiki-CVS mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs

Reply via email to