D2771: hgweb: expose input stream on parsed WSGI request object

2018-03-12 Thread indygreg (Gregory Szorc)
This revision was automatically updated to reflect the committed changes.
Closed by commit rHGda4e2f87167d: hgweb: expose input stream on parsed WSGI 
request object (authored by indygreg, committed by ).

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D2771?vs=6833=6909

REVISION DETAIL
  https://phab.mercurial-scm.org/D2771

AFFECTED FILES
  mercurial/hgweb/hgwebdir_mod.py
  mercurial/hgweb/request.py
  mercurial/wireprotoserver.py

CHANGE DETAILS

diff --git a/mercurial/wireprotoserver.py b/mercurial/wireprotoserver.py
--- a/mercurial/wireprotoserver.py
+++ b/mercurial/wireprotoserver.py
@@ -83,7 +83,7 @@
 postlen = int(self._req.headers.get(b'X-HgArgs-Post', 0))
 if postlen:
 args.update(urlreq.parseqs(
-self._wsgireq.inp.read(postlen), keep_blank_values=True))
+self._req.bodyfh.read(postlen), keep_blank_values=True))
 return args
 
 argvalue = decodevaluefromheaders(self._req, b'X-HgArg')
@@ -97,7 +97,7 @@
 # If httppostargs is used, we need to read Content-Length
 # minus the amount that was consumed by args.
 length -= int(self._req.headers.get(b'X-HgArgs-Post', 0))
-for s in util.filechunkiter(self._wsgireq.inp, limit=length):
+for s in util.filechunkiter(self._req.bodyfh, limit=length):
 fp.write(s)
 
 @contextlib.contextmanager
diff --git a/mercurial/hgweb/request.py b/mercurial/hgweb/request.py
--- a/mercurial/hgweb/request.py
+++ b/mercurial/hgweb/request.py
@@ -61,7 +61,10 @@
 
 @attr.s(frozen=True)
 class parsedrequest(object):
-"""Represents a parsed WSGI request / static HTTP request parameters."""
+"""Represents a parsed WSGI request.
+
+Contains both parsed parameters as well as a handle on the input stream.
+"""
 
 # Request method.
 method = attr.ib()
@@ -91,8 +94,10 @@
 # wsgiref.headers.Headers instance. Operates like a dict with case
 # insensitive keys.
 headers = attr.ib()
+# Request body input stream.
+bodyfh = attr.ib()
 
-def parserequestfromenv(env):
+def parserequestfromenv(env, bodyfh):
 """Parse URL components from environment variables.
 
 WSGI defines request attributes via environment variables. This function
@@ -209,6 +214,12 @@
 if 'CONTENT_LENGTH' in env and 'HTTP_CONTENT_LENGTH' not in env:
 headers['Content-Length'] = env['CONTENT_LENGTH']
 
+# TODO do this once we remove wsgirequest.inp, otherwise we could have
+# multiple readers from the underlying input stream.
+#bodyfh = env['wsgi.input']
+#if 'Content-Length' in headers:
+#bodyfh = util.cappedreader(bodyfh, int(headers['Content-Length']))
+
 return parsedrequest(method=env['REQUEST_METHOD'],
  url=fullurl, baseurl=baseurl,
  advertisedurl=advertisedfullurl,
@@ -219,7 +230,8 @@
  querystring=querystring,
  querystringlist=querystringlist,
  querystringdict=querystringdict,
- headers=headers)
+ headers=headers,
+ bodyfh=bodyfh)
 
 class wsgirequest(object):
 """Higher-level API for a WSGI request.
@@ -233,28 +245,27 @@
 if (version < (1, 0)) or (version >= (2, 0)):
 raise RuntimeError("Unknown and unsupported WSGI version %d.%d"
% version)
-self.inp = wsgienv[r'wsgi.input']
+
+inp = wsgienv[r'wsgi.input']
 
 if r'HTTP_CONTENT_LENGTH' in wsgienv:
-self.inp = util.cappedreader(self.inp,
- int(wsgienv[r'HTTP_CONTENT_LENGTH']))
+inp = util.cappedreader(inp, int(wsgienv[r'HTTP_CONTENT_LENGTH']))
 elif r'CONTENT_LENGTH' in wsgienv:
-self.inp = util.cappedreader(self.inp,
- int(wsgienv[r'CONTENT_LENGTH']))
+inp = util.cappedreader(inp, int(wsgienv[r'CONTENT_LENGTH']))
 
 self.err = wsgienv[r'wsgi.errors']
 self.threaded = wsgienv[r'wsgi.multithread']
 self.multiprocess = wsgienv[r'wsgi.multiprocess']
 self.run_once = wsgienv[r'wsgi.run_once']
 self.env = wsgienv
-self.form = normalize(cgi.parse(self.inp,
+self.form = normalize(cgi.parse(inp,
 self.env,
 keep_blank_values=1))
 self._start_response = start_response
 self.server_write = None
 self.headers = []
 
-self.req = parserequestfromenv(wsgienv)
+self.req = parserequestfromenv(wsgienv, inp)
 
 def respond(self, status, type, filename=None, body=None):
 if not isinstance(type, str):
@@ -315,7 +326,7 @@
 # input stream doesn't overrun the actual request. So there's
 # no guarantee that 

D2771: hgweb: expose input stream on parsed WSGI request object

2018-03-10 Thread indygreg (Gregory Szorc)
indygreg updated this revision to Diff 6833.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D2771?vs=6815=6833

REVISION DETAIL
  https://phab.mercurial-scm.org/D2771

AFFECTED FILES
  mercurial/hgweb/hgwebdir_mod.py
  mercurial/hgweb/request.py
  mercurial/wireprotoserver.py

CHANGE DETAILS

diff --git a/mercurial/wireprotoserver.py b/mercurial/wireprotoserver.py
--- a/mercurial/wireprotoserver.py
+++ b/mercurial/wireprotoserver.py
@@ -83,7 +83,7 @@
 postlen = int(self._req.headers.get(b'X-HgArgs-Post', 0))
 if postlen:
 args.update(urlreq.parseqs(
-self._wsgireq.inp.read(postlen), keep_blank_values=True))
+self._req.bodyfh.read(postlen), keep_blank_values=True))
 return args
 
 argvalue = decodevaluefromheaders(self._req, b'X-HgArg')
@@ -97,7 +97,7 @@
 # If httppostargs is used, we need to read Content-Length
 # minus the amount that was consumed by args.
 length -= int(self._req.headers.get(b'X-HgArgs-Post', 0))
-for s in util.filechunkiter(self._wsgireq.inp, limit=length):
+for s in util.filechunkiter(self._req.bodyfh, limit=length):
 fp.write(s)
 
 @contextlib.contextmanager
diff --git a/mercurial/hgweb/request.py b/mercurial/hgweb/request.py
--- a/mercurial/hgweb/request.py
+++ b/mercurial/hgweb/request.py
@@ -61,7 +61,10 @@
 
 @attr.s(frozen=True)
 class parsedrequest(object):
-"""Represents a parsed WSGI request / static HTTP request parameters."""
+"""Represents a parsed WSGI request.
+
+Contains both parsed parameters as well as a handle on the input stream.
+"""
 
 # Request method.
 method = attr.ib()
@@ -91,8 +94,10 @@
 # wsgiref.headers.Headers instance. Operates like a dict with case
 # insensitive keys.
 headers = attr.ib()
+# Request body input stream.
+bodyfh = attr.ib()
 
-def parserequestfromenv(env):
+def parserequestfromenv(env, bodyfh):
 """Parse URL components from environment variables.
 
 WSGI defines request attributes via environment variables. This function
@@ -209,6 +214,12 @@
 if 'CONTENT_LENGTH' in env and 'HTTP_CONTENT_LENGTH' not in env:
 headers['Content-Length'] = env['CONTENT_LENGTH']
 
+# TODO do this once we remove wsgirequest.inp, otherwise we could have
+# multiple readers from the underlying input stream.
+#bodyfh = env['wsgi.input']
+#if 'Content-Length' in headers:
+#bodyfh = util.cappedreader(bodyfh, int(headers['Content-Length']))
+
 return parsedrequest(method=env['REQUEST_METHOD'],
  url=fullurl, baseurl=baseurl,
  advertisedurl=advertisedfullurl,
@@ -219,7 +230,8 @@
  querystring=querystring,
  querystringlist=querystringlist,
  querystringdict=querystringdict,
- headers=headers)
+ headers=headers,
+ bodyfh=bodyfh)
 
 class wsgirequest(object):
 """Higher-level API for a WSGI request.
@@ -233,28 +245,27 @@
 if (version < (1, 0)) or (version >= (2, 0)):
 raise RuntimeError("Unknown and unsupported WSGI version %d.%d"
% version)
-self.inp = wsgienv[r'wsgi.input']
+
+inp = wsgienv[r'wsgi.input']
 
 if r'HTTP_CONTENT_LENGTH' in wsgienv:
-self.inp = util.cappedreader(self.inp,
- int(wsgienv[r'HTTP_CONTENT_LENGTH']))
+inp = util.cappedreader(inp, int(wsgienv[r'HTTP_CONTENT_LENGTH']))
 elif r'CONTENT_LENGTH' in wsgienv:
-self.inp = util.cappedreader(self.inp,
- int(wsgienv[r'CONTENT_LENGTH']))
+inp = util.cappedreader(inp, int(wsgienv[r'CONTENT_LENGTH']))
 
 self.err = wsgienv[r'wsgi.errors']
 self.threaded = wsgienv[r'wsgi.multithread']
 self.multiprocess = wsgienv[r'wsgi.multiprocess']
 self.run_once = wsgienv[r'wsgi.run_once']
 self.env = wsgienv
-self.form = normalize(cgi.parse(self.inp,
+self.form = normalize(cgi.parse(inp,
 self.env,
 keep_blank_values=1))
 self._start_response = start_response
 self.server_write = None
 self.headers = []
 
-self.req = parserequestfromenv(wsgienv)
+self.req = parserequestfromenv(wsgienv, inp)
 
 def respond(self, status, type, filename=None, body=None):
 if not isinstance(type, str):
@@ -315,7 +326,7 @@
 # input stream doesn't overrun the actual request. So there's
 # no guarantee that reading until EOF won't corrupt the stream
 # state.
-if not isinstance(self.inp, util.cappedreader):
+if not 

D2771: hgweb: expose input stream on parsed WSGI request object

2018-03-09 Thread indygreg (Gregory Szorc)
indygreg created this revision.
Herald added a subscriber: mercurial-devel.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  Our next step towards moving away from wsgirequest to our newer,
  friendlier parsedrequest type is input stream access.
  
  This commit exposes the input stream on the instance. Consumers
  in the HTTP protocol server switch to it.
  
  Because there were very few consumers of the input stream, we stopped
  storing a reference to the input stream on wsgirequest directly. All
  access now goes through parsedrequest. However, wsgirequest still
  may read from this stream as part of cgi.parse(). So we still need to
  create the stream from wsgirequest.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D2771

AFFECTED FILES
  mercurial/hgweb/hgwebdir_mod.py
  mercurial/hgweb/request.py
  mercurial/wireprotoserver.py

CHANGE DETAILS

diff --git a/mercurial/wireprotoserver.py b/mercurial/wireprotoserver.py
--- a/mercurial/wireprotoserver.py
+++ b/mercurial/wireprotoserver.py
@@ -83,7 +83,7 @@
 postlen = int(self._req.headers.get(b'X-HgArgs-Post', 0))
 if postlen:
 args.update(urlreq.parseqs(
-self._wsgireq.inp.read(postlen), keep_blank_values=True))
+self._req.bodyfh.read(postlen), keep_blank_values=True))
 return args
 
 argvalue = decodevaluefromheaders(self._req, b'X-HgArg')
@@ -97,7 +97,7 @@
 # If httppostargs is used, we need to read Content-Length
 # minus the amount that was consumed by args.
 length -= int(self._req.headers.get(b'X-HgArgs-Post', 0))
-for s in util.filechunkiter(self._wsgireq.inp, limit=length):
+for s in util.filechunkiter(self._req.bodyfh, limit=length):
 fp.write(s)
 
 @contextlib.contextmanager
diff --git a/mercurial/hgweb/request.py b/mercurial/hgweb/request.py
--- a/mercurial/hgweb/request.py
+++ b/mercurial/hgweb/request.py
@@ -61,7 +61,10 @@
 
 @attr.s(frozen=True)
 class parsedrequest(object):
-"""Represents a parsed WSGI request / static HTTP request parameters."""
+"""Represents a parsed WSGI request.
+
+Contains both parsed parameters as well as a handle on the input stream.
+"""
 
 # Request method.
 method = attr.ib()
@@ -91,8 +94,10 @@
 # wsgiref.headers.Headers instance. Operates like a dict with case
 # insensitive keys.
 headers = attr.ib()
+# Request body input stream.
+bodyfh = attr.ib()
 
-def parserequestfromenv(env):
+def parserequestfromenv(env, bodyfh):
 """Parse URL components from environment variables.
 
 WSGI defines request attributes via environment variables. This function
@@ -209,6 +214,12 @@
 if 'CONTENT_LENGTH' in env and 'HTTP_CONTENT_LENGTH' not in env:
 headers['Content-Length'] = env['CONTENT_LENGTH']
 
+# TODO do this once we remove wsgirequest.inp, otherwise we could have
+# multiple readers from the underlying input stream.
+#bodyfh = env['wsgi.input']
+#if 'Content-Length' in headers:
+#bodyfh = util.cappedreader(bodyfh, int(headers['Content-Length']))
+
 return parsedrequest(method=env['REQUEST_METHOD'],
  url=fullurl, baseurl=baseurl,
  advertisedurl=advertisedfullurl,
@@ -219,7 +230,8 @@
  querystring=querystring,
  querystringlist=querystringlist,
  querystringdict=querystringdict,
- headers=headers)
+ headers=headers,
+ bodyfh=bodyfh)
 
 class wsgirequest(object):
 """Higher-level API for a WSGI request.
@@ -233,28 +245,27 @@
 if (version < (1, 0)) or (version >= (2, 0)):
 raise RuntimeError("Unknown and unsupported WSGI version %d.%d"
% version)
-self.inp = wsgienv[r'wsgi.input']
+
+inp = wsgienv[r'wsgi.input']
 
 if r'HTTP_CONTENT_LENGTH' in wsgienv:
-self.inp = util.cappedreader(self.inp,
- int(wsgienv[r'HTTP_CONTENT_LENGTH']))
+inp = util.cappedreader(inp, int(wsgienv[r'HTTP_CONTENT_LENGTH']))
 elif r'CONTENT_LENGTH' in wsgienv:
-self.inp = util.cappedreader(self.inp,
- int(wsgienv[r'CONTENT_LENGTH']))
+inp = util.cappedreader(inp, int(wsgienv[r'CONTENT_LENGTH']))
 
 self.err = wsgienv[r'wsgi.errors']
 self.threaded = wsgienv[r'wsgi.multithread']
 self.multiprocess = wsgienv[r'wsgi.multiprocess']
 self.run_once = wsgienv[r'wsgi.run_once']
 self.env = wsgienv
-self.form = normalize(cgi.parse(self.inp,
+self.form = normalize(cgi.parse(inp,
 self.env,
 keep_blank_values=1))
 self._start_response = start_response