Legoktm has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/399365 )

Change subject: Support multiple search profiles
......................................................................

Support multiple search profiles

* search/ -> MediaWiki core plus extensions and skins
* extensions/ -> extensions
* skins/ -> skins
* things -> extensions and skins

Internally we run four hound instances, one for each search profile.
Then we have a flask application proxy requests to each one depending
upon the URL path. We also use the application to insert some of our
HTML into the main index page. Please don't hurt me.

Change-Id: I91ff2b12b859ed177724a22e3909823b9b5343a6
---
M .gitignore
A README
A app.py
A hound_proxy.service
M start.sh
M write_config.py
6 files changed, 158 insertions(+), 32 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/labs/codesearch 
refs/changes/65/399365/1

diff --git a/.gitignore b/.gitignore
index c207b39..826ce0b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
-config.json
-data
-
+search
+extensions
+skins
+things
diff --git a/README b/README
new file mode 100644
index 0000000..8fe6b52
--- /dev/null
+++ b/README
@@ -0,0 +1,5 @@
+MediaWiki code search!
+
+Documentation coming.
+
+hound_proxy.service should be copied to /etc/systemd/system/
diff --git a/app.py b/app.py
new file mode 100644
index 0000000..db44571
--- /dev/null
+++ b/app.py
@@ -0,0 +1,74 @@
+#!/usr/bin/env python3
+"""
+Proxy requests to hound
+Copyright (C) 2017 Kunal Mehta <[email protected]>
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program.  If not, see <https://www.gnu.org/licenses/>.
+"""
+
+from flask import Flask, Response, request, redirect, url_for
+
+import requests
+
+app = Flask(__name__)
+
+BACKENDS = {
+    'search': 6080,  # all
+    'extensions': 6081,
+    'skins': 6082,
+    'things': 6083,
+}
+
+
[email protected]('/')
+def homepage():
+    return redirect(url_for('index', backend='search'))
+
+
[email protected]('/<backend>/')
+def index(backend):
+    if backend not in BACKENDS:
+        return 'invalid backend'
+
+    def mangle(text):
+        header = '<h2 style="text-align: center;">MediaWiki code search</h2>'
+        return text.replace('<body>', '<body>' + header)
+    return proxy(backend, mangle=mangle)
+
+
[email protected]('/<backend>/<path:path>')
+def proxy(backend, path='', mangle=False):
+    if backend not in BACKENDS:
+        return 'invalid backend'
+    port = BACKENDS[backend]
+    r = requests.get(
+        'http://localhost:%s/%s' % (port, path),
+        params=request.args
+    )
+    excluded_headers = ['content-encoding', 'content-length', 
'transfer-encoding', 'connection']
+    headers = [(name, value) for (name, value) in r.raw.headers.items()
+               if name.lower() not in excluded_headers]
+    if mangle:
+        text = mangle(r.text)
+    else:
+        text = r.text
+    return Response(
+        text,
+        r.status_code,
+        headers
+    )
+
+
+if __name__ == '__main__':
+    app.run(debug=True)
diff --git a/hound_proxy.service b/hound_proxy.service
new file mode 100644
index 0000000..bf663bb
--- /dev/null
+++ b/hound_proxy.service
@@ -0,0 +1,19 @@
+[Unit]
+Description=hound proxy gunicorn daemon
+After=network.target
+
+[Service]
+PIDFile=/run/gunicorn/pid
+User=www-data
+Group=www-data
+RuntimeDirectory=gunicorn
+WorkingDirectory=/srv/codesearch
+ExecStart=/usr/bin/gunicorn3 --pid /run/gunicorn/pid   \
+          -w 4 -b 127.0.0.1:3002 app:app
+ExecReload=/bin/kill -s HUP $MAINPID
+ExecStop=/bin/kill -s TERM $MAINPID
+PrivateTmp=true
+
+[Install]
+WantedBy=multi-user.target
+
diff --git a/start.sh b/start.sh
index 6fa89f0..2aa8f8f 100755
--- a/start.sh
+++ b/start.sh
@@ -1,2 +1,5 @@
 #!/bin/bash
-docker run -d -p 6080:6080 --name hound -v $(pwd):/data etsy/hound
+docker run -d -p 6080:6080 --name hound-search -v /srv/codesearch/search:/data 
etsy/hound
+docker run -d -p 6081:6080 --name hound-extensions -v 
/srv/codesearch/extensions:/data etsy/hound
+docker run -d -p 6082:6080 --name hound-skins -v /srv/codesearch/skins:/data 
etsy/hound
+docker run -d -p 6083:6080 --name hound-things -v /srv/codesearch/things:/data 
etsy/hound
diff --git a/write_config.py b/write_config.py
old mode 100644
new mode 100755
index 25cb3c0..561c236
--- a/write_config.py
+++ b/write_config.py
@@ -17,16 +17,29 @@
 along with this program.  If not, see <https://www.gnu.org/licenses/>.
 """
 
+import functools
 import json
+import os
 import requests
 
 # One hour
 POLL = 60 * 60 * 1000
-BASE = {
-    'max-concurrent-indexers': 2,
-    'dbpath': 'data',
-    'repos': {}
-}
+
+
[email protected]_cache()
+def get_extdist_repos():
+    r = requests.get(
+        'https://www.mediawiki.org/w/api.php',
+        params={
+            "action": "query",
+            "format": "json",
+            'formatversion': 2,
+            "list": "extdistrepos"
+        }
+    )
+    r.raise_for_status()
+
+    return r.json()
 
 
 def repo_info(gerrit_name):
@@ -40,30 +53,41 @@
     }
 
 
-BASE['repos']['MediaWiki core'] = repo_info('mediawiki/core')
-
-
-r = requests.get(
-    'https://www.mediawiki.org/w/api.php',
-    params={
-        "action": "query",
-        "format": "json",
-        'formatversion': 2,
-        "list": "extdistrepos"
+def make_conf(directory, core=True, exts=True, skins=True):
+    conf = {
+        'max-concurrent-indexers': 2,
+        'dbpath': 'data',
+        'repos': {}
     }
-)
-r.raise_for_status()
 
-data = r.json()
-for ext in data['query']['extdistrepos']['extensions']:
-    BASE['repos']['Extension:%s' % ext] = repo_info(
-        'mediawiki/extensions/%s' % ext
-    )
+    if core:
+        conf['repos']['MediaWiki core'] = repo_info('mediawiki/core')
 
-for skin in data['query']['extdistrepos']['skins']:
-    BASE['repos']['Skin:%s' % skin] = repo_info(
-        'mediawiki/skins/%s' % skin
-    )
+    data = get_extdist_repos()
+    if exts:
+        for ext in data['query']['extdistrepos']['extensions']:
+            conf['repos']['Extension:%s' % ext] = repo_info(
+                'mediawiki/extensions/%s' % ext
+            )
 
-with open('config.json', 'w') as f:
-    json.dump(BASE, f, indent='\t')
+    if skins:
+        for skin in data['query']['extdistrepos']['skins']:
+            conf['repos']['Skin:%s' % skin] = repo_info(
+                'mediawiki/skins/%s' % skin
+            )
+
+    if not os.path.isdir(directory):
+        os.mkdir(directory)
+    with open(os.path.join(directory, 'config.json'), 'w') as f:
+        json.dump(conf, f, indent='\t')
+
+
+def main():
+    make_conf('search')
+    make_conf('extensions', core=False, skins=False)
+    make_conf('skins', core=False, exts=False)
+    make_conf('things', core=False)
+
+
+if __name__ == '__main__':
+    main()

-- 
To view, visit https://gerrit.wikimedia.org/r/399365
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I91ff2b12b859ed177724a22e3909823b9b5343a6
Gerrit-PatchSet: 1
Gerrit-Project: labs/codesearch
Gerrit-Branch: master
Gerrit-Owner: Legoktm <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to