# HG changeset patch # User Gregory Szorc <gregory.sz...@gmail.com> # Date 1491021501 25200 # Fri Mar 31 21:38:21 2017 -0700 # Node ID 0b8be3d244585f5a2874821418fce41bf7631f6c # Parent 4cec8e88d09775ee6478e307e9dde94af5b9fcfd hgweb: introduce staticimmutable web command
Currently, hgweb serves HTTP responses with an Etag header whose value is "W/<mtime>" where <mtime> is the modification time of the repo being accessed. The "W" means it is a "Weak validator" - something that is an approximation of content but not an identifier of unique content. Assuming the client is smart and can cache responses (like web browsers), a subsequent HTTP request for this URL will send an If-None-Match request header with the value from the previous Etag response header. hgweb will compare the INM header against what it would serve for Etag and if they match, the server fast issues an HTTP 304 Not Modified to prevent serving data to the client. This is a cache hit and is better than no cache hit. But there is overhead from the client sending the HTTP request and waiting for the 304 response. Furthermore, the "weak validator" isn't precise and can lead to weirdness. For example, https://hg.mozilla.org/ consists of a pool of servers behind a load balancer. Each server has an independent clone of repositories (there is no shared filesystem). Therefore, the mtimes on files within repositories may be different. This can result in the Etag value differing between servers. This can result in HTTP 304 not being issued and an HTTP 200 being served instead. And this translates to longer page loads. This isn't a theoretical problem: HTTP requests for static assets against hg.mozilla.org result in HTTP 200 instead of HTTP 304 most of the time. There are more effective ways to perform content caching with HTTP. One trick is to use "immutable" URLs. Essentially, the content at a URL is constant for the lifetime of the URL. When this URL is served, the HTTP response basically says "cache me forever." There's no conditional HTTP request on next access: if the client has that URL cached, it just uses it no questions asked. This avoids the network round trip and can result in drastically faster page loads. This commit introduces a new web command for processing requests for "immutable" static assets. Where the URL path pattern for a "static" web command is simply <path>, the URL path pattern for a "staticimmutable" web command is "<hash>/<path>" where <hash> is derived from content. This makes <hash> a "strong" validator in the HTTP RFC sense and allows us to leverage aggressive caching techniques. Strictly speaking, the order in the URL violates REST best practices: ideally <hash> would come after <path>. However, a lot of tools (including browser devtools) use the final path component to label a resource. A hash is not a useful label. So, we invert the order so the final URL path component is friendly. This makes no difference to caching behavior. But it will upset REST zealots in their ivory towers. To them, I saw practically wins over purity. The behavior of "staticimmutable" after this commit is basically the same as "static." Future commits will implement the content hashing and caching features. diff --git a/mercurial/hgweb/common.py b/mercurial/hgweb/common.py --- a/mercurial/hgweb/common.py +++ b/mercurial/hgweb/common.py @@ -10,6 +10,7 @@ from __future__ import absolute_import import base64 import errno +import hashlib import mimetypes import os import uuid @@ -146,7 +147,7 @@ def ispathsafe(path): return True -def staticfile(directory, fname, req): +def staticfile(directory, fname, req, immutablehash=None): """return a file inside directory with guessed Content-Type header fname always uses '/' as directory separator and isn't allowed to @@ -171,6 +172,15 @@ def staticfile(directory, fname, req): with open(path, 'rb') as fh: data = fh.read() + # Immutable request but the hash does not match found content. + # This should not happen. In the rare case it does, 404 the request + # because the requested content does not match what we have. + if immutablehash: + hash = hashlib.sha1(data).hexdigest() + + if hash != immutablehash: + raise ErrorResponse(HTTP_NOT_FOUND) + req.respond(HTTP_OK, ct, body=data) except TypeError: raise ErrorResponse(HTTP_SERVER_ERROR, 'illegal filename') diff --git a/mercurial/hgweb/hgweb_mod.py b/mercurial/hgweb/hgweb_mod.py --- a/mercurial/hgweb/hgweb_mod.py +++ b/mercurial/hgweb/hgweb_mod.py @@ -389,7 +389,7 @@ class hgweb(object): if util.safehasattr(webcommands, cmd): req.form['cmd'] = [cmd] - if cmd == 'static': + if cmd in ('static', 'staticimmutable'): req.form['file'] = ['/'.join(args)] else: if args and args[0]: @@ -418,7 +418,7 @@ class hgweb(object): ctype = templater.stringify(ctype) # check read permissions non-static content - if cmd != 'static': + if cmd not in ('static', 'staticimmutable'): self.check_perm(rctx, req, None) if cmd == '': diff --git a/mercurial/hgweb/webcommands.py b/mercurial/hgweb/webcommands.py --- a/mercurial/hgweb/webcommands.py +++ b/mercurial/hgweb/webcommands.py @@ -1139,6 +1139,23 @@ def static(web, req, tmpl): staticfile(static, fname, req) return [] +@webcommand('staticimmutable') +def staticimmutable(web, req, tmpl): + parts = req.form['file'][0].split('/', 1) + + # Requested path is <path>/<hash> + if len(parts) != 2: + raise ErrorResponse(HTTP_NOT_FOUND) + + hash, fname = parts + + tp = web.templatepath or templater.templatepaths() + if isinstance(tp, str): + tp = [tp] + static = [os.path.join(p, 'static') for p in tp] + staticfile(static, fname, req, immutablehash=hash) + return [] + @webcommand('graph') def graph(web, req, tmpl): """ diff --git a/tests/test-hgweb.t b/tests/test-hgweb.t --- a/tests/test-hgweb.t +++ b/tests/test-hgweb.t @@ -106,6 +106,136 @@ should give a 404 - static file that doe [1] +staticimmutable without all path components should give 404 + + $ get-with-headers.py localhost:$HGPORT 'staticimmutable/bogus' + 404 Not Found + + <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd"> + <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en-US"> + <head> + <link rel="icon" href="/static/hgicon.png" type="image/png" /> + <meta name="robots" content="index, nofollow" /> + <link rel="stylesheet" href="/static/style-paper.css" type="text/css" /> + <script type="text/javascript" src="/static/mercurial.js"></script> + + <title>test: error</title> + </head> + <body> + + <div class="container"> + <div class="menu"> + <div class="logo"> + <a href="https://mercurial-scm.org/"> + <img src="/static/hglogo.png" width=75 height=90 border=0 alt="mercurial" /></a> + </div> + <ul> + <li><a href="/shortlog">log</a></li> + <li><a href="/graph">graph</a></li> + <li><a href="/tags">tags</a></li> + <li><a href="/bookmarks">bookmarks</a></li> + <li><a href="/branches">branches</a></li> + </ul> + <ul> + <li><a href="/help">help</a></li> + </ul> + </div> + + <div class="main"> + + <h2 class="breadcrumb"><a href="/">Mercurial</a> </h2> + <h3>error</h3> + + <form class="search" action="/log"> + + <p><input name="rev" id="search1" type="text" size="30"></p> + <div id="hint">Find changesets by keywords (author, files, the commit message), revision + number or hash, or <a href="/help/revsets">revset expression</a>.</div> + </form> + + <div class="description"> + <p> + An error occurred while processing your request: + </p> + <p> + Not Found + </p> + </div> + </div> + </div> + + + + </body> + </html> + + [1] + +staticimmutable with a bogus hash should give 404 + + $ get-with-headers.py localhost:$HGPORT 'staticimmutable/badhash/mercurial.js' + 404 Not Found + + <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd"> + <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en-US"> + <head> + <link rel="icon" href="/static/hgicon.png" type="image/png" /> + <meta name="robots" content="index, nofollow" /> + <link rel="stylesheet" href="/static/style-paper.css" type="text/css" /> + <script type="text/javascript" src="/static/mercurial.js"></script> + + <title>test: error</title> + </head> + <body> + + <div class="container"> + <div class="menu"> + <div class="logo"> + <a href="https://mercurial-scm.org/"> + <img src="/static/hglogo.png" width=75 height=90 border=0 alt="mercurial" /></a> + </div> + <ul> + <li><a href="/shortlog">log</a></li> + <li><a href="/graph">graph</a></li> + <li><a href="/tags">tags</a></li> + <li><a href="/bookmarks">bookmarks</a></li> + <li><a href="/branches">branches</a></li> + </ul> + <ul> + <li><a href="/help">help</a></li> + </ul> + </div> + + <div class="main"> + + <h2 class="breadcrumb"><a href="/">Mercurial</a> </h2> + <h3>error</h3> + + <form class="search" action="/log"> + + <p><input name="rev" id="search1" type="text" size="30"></p> + <div id="hint">Find changesets by keywords (author, files, the commit message), revision + number or hash, or <a href="/help/revsets">revset expression</a>.</div> + </form> + + <div class="description"> + <p> + An error occurred while processing your request: + </p> + <p> + Not Found + </p> + </div> + </div> + </div> + + + + </body> + </html> + + [1] + should give a 404 - bad revision $ get-with-headers.py localhost:$HGPORT 'file/spam/foo?style=raw' @@ -331,7 +461,7 @@ stop and restart Test the access/error files are opened in append mode $ $PYTHON -c "print len(file('access.log').readlines()), 'log lines written'" - 14 log lines written + 16 log lines written static file _______________________________________________ Mercurial-devel mailing list Mercurial-devel@mercurial-scm.org https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel