Legoktm has uploaded a new change for review. ( https://gerrit.wikimedia.org/r/399365 )
Change subject: Support multiple search profiles ...................................................................... Support multiple search profiles * search/ -> MediaWiki core plus extensions and skins * extensions/ -> extensions * skins/ -> skins * things -> extensions and skins Internally we run four hound instances, one for each search profile. Then we have a flask application proxy requests to each one depending upon the URL path. We also use the application to insert some of our HTML into the main index page. Please don't hurt me. Change-Id: I91ff2b12b859ed177724a22e3909823b9b5343a6 --- M .gitignore A README A app.py A hound_proxy.service M start.sh M write_config.py 6 files changed, 158 insertions(+), 32 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/labs/codesearch refs/changes/65/399365/1 diff --git a/.gitignore b/.gitignore index c207b39..826ce0b 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ -config.json -data - +search +extensions +skins +things diff --git a/README b/README new file mode 100644 index 0000000..8fe6b52 --- /dev/null +++ b/README @@ -0,0 +1,5 @@ +MediaWiki code search! + +Documentation coming. + +hound_proxy.service should be copied to /etc/systemd/system/ diff --git a/app.py b/app.py new file mode 100644 index 0000000..db44571 --- /dev/null +++ b/app.py @@ -0,0 +1,74 @@ +#!/usr/bin/env python3 +""" +Proxy requests to hound +Copyright (C) 2017 Kunal Mehta <[email protected]> + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see <https://www.gnu.org/licenses/>. +""" + +from flask import Flask, Response, request, redirect, url_for + +import requests + +app = Flask(__name__) + +BACKENDS = { + 'search': 6080, # all + 'extensions': 6081, + 'skins': 6082, + 'things': 6083, +} + + [email protected]('/') +def homepage(): + return redirect(url_for('index', backend='search')) + + [email protected]('/<backend>/') +def index(backend): + if backend not in BACKENDS: + return 'invalid backend' + + def mangle(text): + header = '<h2 style="text-align: center;">MediaWiki code search</h2>' + return text.replace('<body>', '<body>' + header) + return proxy(backend, mangle=mangle) + + [email protected]('/<backend>/<path:path>') +def proxy(backend, path='', mangle=False): + if backend not in BACKENDS: + return 'invalid backend' + port = BACKENDS[backend] + r = requests.get( + 'http://localhost:%s/%s' % (port, path), + params=request.args + ) + excluded_headers = ['content-encoding', 'content-length', 'transfer-encoding', 'connection'] + headers = [(name, value) for (name, value) in r.raw.headers.items() + if name.lower() not in excluded_headers] + if mangle: + text = mangle(r.text) + else: + text = r.text + return Response( + text, + r.status_code, + headers + ) + + +if __name__ == '__main__': + app.run(debug=True) diff --git a/hound_proxy.service b/hound_proxy.service new file mode 100644 index 0000000..bf663bb --- /dev/null +++ b/hound_proxy.service @@ -0,0 +1,19 @@ +[Unit] +Description=hound proxy gunicorn daemon +After=network.target + +[Service] +PIDFile=/run/gunicorn/pid +User=www-data +Group=www-data +RuntimeDirectory=gunicorn +WorkingDirectory=/srv/codesearch +ExecStart=/usr/bin/gunicorn3 --pid /run/gunicorn/pid \ + -w 4 -b 127.0.0.1:3002 app:app +ExecReload=/bin/kill -s HUP $MAINPID +ExecStop=/bin/kill -s TERM $MAINPID +PrivateTmp=true + +[Install] +WantedBy=multi-user.target + diff --git a/start.sh b/start.sh index 6fa89f0..2aa8f8f 100755 --- a/start.sh +++ b/start.sh @@ -1,2 +1,5 @@ #!/bin/bash -docker run -d -p 6080:6080 --name hound -v $(pwd):/data etsy/hound +docker run -d -p 6080:6080 --name hound-search -v /srv/codesearch/search:/data etsy/hound +docker run -d -p 6081:6080 --name hound-extensions -v /srv/codesearch/extensions:/data etsy/hound +docker run -d -p 6082:6080 --name hound-skins -v /srv/codesearch/skins:/data etsy/hound +docker run -d -p 6083:6080 --name hound-things -v /srv/codesearch/things:/data etsy/hound diff --git a/write_config.py b/write_config.py old mode 100644 new mode 100755 index 25cb3c0..561c236 --- a/write_config.py +++ b/write_config.py @@ -17,16 +17,29 @@ along with this program. If not, see <https://www.gnu.org/licenses/>. """ +import functools import json +import os import requests # One hour POLL = 60 * 60 * 1000 -BASE = { - 'max-concurrent-indexers': 2, - 'dbpath': 'data', - 'repos': {} -} + + [email protected]_cache() +def get_extdist_repos(): + r = requests.get( + 'https://www.mediawiki.org/w/api.php', + params={ + "action": "query", + "format": "json", + 'formatversion': 2, + "list": "extdistrepos" + } + ) + r.raise_for_status() + + return r.json() def repo_info(gerrit_name): @@ -40,30 +53,41 @@ } -BASE['repos']['MediaWiki core'] = repo_info('mediawiki/core') - - -r = requests.get( - 'https://www.mediawiki.org/w/api.php', - params={ - "action": "query", - "format": "json", - 'formatversion': 2, - "list": "extdistrepos" +def make_conf(directory, core=True, exts=True, skins=True): + conf = { + 'max-concurrent-indexers': 2, + 'dbpath': 'data', + 'repos': {} } -) -r.raise_for_status() -data = r.json() -for ext in data['query']['extdistrepos']['extensions']: - BASE['repos']['Extension:%s' % ext] = repo_info( - 'mediawiki/extensions/%s' % ext - ) + if core: + conf['repos']['MediaWiki core'] = repo_info('mediawiki/core') -for skin in data['query']['extdistrepos']['skins']: - BASE['repos']['Skin:%s' % skin] = repo_info( - 'mediawiki/skins/%s' % skin - ) + data = get_extdist_repos() + if exts: + for ext in data['query']['extdistrepos']['extensions']: + conf['repos']['Extension:%s' % ext] = repo_info( + 'mediawiki/extensions/%s' % ext + ) -with open('config.json', 'w') as f: - json.dump(BASE, f, indent='\t') + if skins: + for skin in data['query']['extdistrepos']['skins']: + conf['repos']['Skin:%s' % skin] = repo_info( + 'mediawiki/skins/%s' % skin + ) + + if not os.path.isdir(directory): + os.mkdir(directory) + with open(os.path.join(directory, 'config.json'), 'w') as f: + json.dump(conf, f, indent='\t') + + +def main(): + make_conf('search') + make_conf('extensions', core=False, skins=False) + make_conf('skins', core=False, exts=False) + make_conf('things', core=False) + + +if __name__ == '__main__': + main() -- To view, visit https://gerrit.wikimedia.org/r/399365 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I91ff2b12b859ed177724a22e3909823b9b5343a6 Gerrit-PatchSet: 1 Gerrit-Project: labs/codesearch Gerrit-Branch: master Gerrit-Owner: Legoktm <[email protected]> _______________________________________________ MediaWiki-commits mailing list [email protected] https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits
