[tahoe-dev] darcs patch: Improve HTTP/1.1 byterange handling

Jeremy Fitzhardinge Tue, 09 Mar 2010 19:15:48 -0800

Tue Mar  9 18:59:13 PST 2010  Jeremy Fitzhardinge <[email protected]>
  * Improve HTTP/1.1 byterange handling
  
  Fix parsing of a Range: header to support:
   - multiple ranges (parsed, but not returned)
   - suffix byte ranges ("-2139")
   - correct handling of incorrectly formatted range headers
     (correct behaviour is to ignore the header and return the full
      file)
   - return appropriate error for ranges outside the file
  
  Multiple ranges are parsed, but only the first range is returned.
  Returning multiple ranges requires using the multipart/byterange
  content type.

New patches:


[Improve HTTP/1.1 byterange handling
Jeremy Fitzhardinge <[email protected]>**20100310025913
 Ignore-this: 6d69e694973d618f0dc65983735cd9be
 
 Fix parsing of a Range: header to support:
  - multiple ranges (parsed, but not returned)
  - suffix byte ranges ("-2139")
  - correct handling of incorrectly formatted range headers
    (correct behaviour is to ignore the header and return the full
     file)
  - return appropriate error for ranges outside the file
 
 Multiple ranges are parsed, but only the first range is returned.
 Returning multiple ranges requires using the multipart/byterange
 content type.
 
] {
hunk ./src/allmydata/test/test_web.py 582
         d.addCallback(_got)
         return d
 
+    def test_GET_FILEURL_partial_end_range(self):
+        headers = {"range": "bytes=-5"}
+        length  = len(self.BAR_CONTENTS)
+        d = self.GET(self.public_url + "/foo/bar.txt", headers=headers,
+                     return_response=True)
+        def _got((res, status, headers)):
+            self.failUnlessEqual(int(status), 206)
+            self.failUnless(headers.has_key("content-range"))
+            self.failUnlessEqual(headers["content-range"][0],
+                                 "bytes %d-%d/%d" % (length-5, length-1, length))
+            self.failUnlessEqual(res, self.BAR_CONTENTS[-5:])
+        d.addCallback(_got)
+        return d
+
+    def test_GET_FILEURL_partial_range_overrun(self):
+        headers = {"range": "bytes=100-200"}
+        length  = len(self.BAR_CONTENTS)
+        d = self.shouldFail2(error.Error, "test_GET_FILEURL_range_overrun",
+                             "416 Requested Range not satisfiable",
+                             "First beyond end of file",
+                             self.GET, self.public_url + "/foo/bar.txt",
+                             headers=headers)
+        return d
+
     def test_HEAD_FILEURL_range(self):
         headers = {"range": "bytes=1-10"}
         d = self.HEAD(self.public_url + "/foo/bar.txt", headers=headers,
hunk ./src/allmydata/test/test_web.py 632
         d.addCallback(_got)
         return d
 
+    def test_HEAD_FILEURL_partial_end_range(self):
+        headers = {"range": "bytes=-5"}
+        length  = len(self.BAR_CONTENTS)
+        d = self.HEAD(self.public_url + "/foo/bar.txt", headers=headers,
+                     return_response=True)
+        def _got((res, status, headers)):
+            self.failUnlessEqual(int(status), 206)
+            self.failUnless(headers.has_key("content-range"))
+            self.failUnlessEqual(headers["content-range"][0],
+                                 "bytes %d-%d/%d" % (length-5, length-1, length))
+        d.addCallback(_got)
+        return d
+
+    def test_HEAD_FILEURL_partial_range_overrun(self):
+        headers = {"range": "bytes=100-200"}
+        length  = len(self.BAR_CONTENTS)
+        d = self.shouldFail2(error.Error, "test_HEAD_FILEURL_range_overrun",
+                             "416 Requested Range not satisfiable",
+                             "",
+                             self.HEAD, self.public_url + "/foo/bar.txt",
+                             headers=headers)
+        return d
+
     def test_GET_FILEURL_range_bad(self):
         headers = {"range": "BOGUS=fizbop-quarnak"}
hunk ./src/allmydata/test/test_web.py 657
-        d = self.shouldFail2(error.Error, "test_GET_FILEURL_range_bad",
-                             "400 Bad Request",
-                             "Syntactically invalid http range header",
-                             self.GET, self.public_url + "/foo/bar.txt",
-                             headers=headers)
+        d = self.GET(self.public_url + "/foo/bar.txt", headers=headers,
+                     return_response=True)
+        def _got((res, status, headers)):
+            self.failUnlessEqual(int(status), 200)
+            self.failUnless(not headers.has_key("content-range"))
+            self.failUnlessEqual(res, self.BAR_CONTENTS)
+        d.addCallback(_got)
         return d
 
     def test_HEAD_FILEURL(self):
hunk ./src/allmydata/web/filenode.py 338
         self.filenode = filenode
         self.filename = filename
 
+    def parse_range_header(self, range):
+        # Parse a byte ranges according to RFC 2616 "14.35.1 Byte
+        # Ranges".  Returns None if the range doesn't make sense so it
+        # can be ignored (per the spec).  When successful, returns a
+        # list of (first,last) inclusive range tuples.
+
+        filesize = self.filenode.get_size()
+        assert isinstance(filesize, (int,long)), filesize
+
+        try:
+            # byte-ranges-specifier
+            units, rangeset = range.split('=', 1)
+            if units != 'bytes':
+                return None     # nothing else supported
+
+            def parse_range(r):
+                first, last = r.split('-', 1)
+
+                if first is '':
+                    # suffix-byte-range-spec
+                    first = filesize - long(last)
+                    last = filesize - 1
+                else:
+                    # byte-range-spec
+
+                    # first-byte-pos
+                    first = long(first)
+
+                    # last-byte-pos
+                    if last is '':
+                        last = filesize - 1
+                    else:
+                        last = long(last)
+
+                if last < first:
+                    raise ValueError
+
+                return (first, last)
+
+            # byte-range-set
+            #
+            # Note: the spec uses "1#" for the list of ranges, which
+            # implicitly allows whitespace around the ',' separators,
+            # so strip it.
+            return [ parse_range(r.strip()) for r in rangeset.split(',') ]
+        except ValueError:
+            return None
+
     def renderHTTP(self, ctx):
         req = IRequest(ctx)
         gte = static.getTypeAndEncoding
hunk ./src/allmydata/web/filenode.py 407
 
         filesize = self.filenode.get_size()
         assert isinstance(filesize, (int,long)), filesize
-        offset, size = 0, None
+        first, size = 0, None
         contentsize = filesize
         req.setHeader("accept-ranges", "bytes")
         if not self.filenode.is_mutable():
hunk ./src/allmydata/web/filenode.py 421
         # or maybe just use the URI for CHK and LIT.
         rangeheader = req.getHeader('range')
         if rangeheader:
-            # adapted from nevow.static.File
-            bytesrange = rangeheader.split('=')
-            if bytesrange[0] != 'bytes':
-                raise WebError("Syntactically invalid http range header!")
-            start, end = bytesrange[1].split('-')
-            if start:
-                offset = int(start)
-                if not end:
-                    # RFC 2616 says:
-                    #
-                    # "If the last-byte-pos value is absent, or if the value is
-                    # greater than or equal to the current length of the
-                    # entity-body, last-byte-pos is taken to be equal to one less
-                    # than the current length of the entity- body in bytes."
-                    end = filesize - 1
-                size = int(end) - offset + 1
-            req.setResponseCode(http.PARTIAL_CONTENT)
-            req.setHeader('content-range',"bytes %s-%s/%s" %
-                          (str(offset), str(offset+size-1), str(filesize)))
-            contentsize = size
+            ranges = self.parse_range_header(rangeheader)
+
+            # ranges = None means the header didn't parse, so ignore
+            # the header as if it didn't exist.  If is more than one
+            # range, then just return the first for now, until we can
+            # generate multipart/byteranges.
+            if ranges is not None:
+                first, last = ranges[0]
+
+                if first >= filesize:
+                    raise WebError('First beyond end of file',
+                                   http.REQUESTED_RANGE_NOT_SATISFIABLE)
+                else:
+                    first = max(0, first)
+                    last = min(filesize-1, last)
+
+                    req.setResponseCode(http.PARTIAL_CONTENT)
+                    req.setHeader('content-range',"bytes %s-%s/%s" %
+                                  (str(first), str(last),
+                                   str(filesize)))
+                    contentsize = last - first + 1
+                    size = contentsize
+
         req.setHeader("content-length", str(contentsize))
         if req.method == "HEAD":
             return ""
hunk ./src/allmydata/web/filenode.py 447
-        d = self.filenode.read(req, offset, size)
+        d = self.filenode.read(req, first, size)
         def _error(f):
             if req.startedWriting:
                 # The content-type is already set, and the response code has
}

Context:

[setup: add licensing declaration for setuptools (noticed by the FSF compliance folks)
[email protected]**20100309184415
 Ignore-this: 2dfa7d812d65fec7c72ddbf0de609ccb
] 
[setup: fix error in licensing declaration from Shawn Willden, as noted by the FSF compliance division
[email protected]**20100309163736
 Ignore-this: c0623d27e469799d86cabf67921a13f8
] 
[CREDITS to Jacob Appelbaum
[email protected]**20100304015616
 Ignore-this: 70db493abbc23968fcc8db93f386ea54
] 
[desert-island-build-with-proper-versions
[email protected]**20100304013858] 
[docs: a few small edits to try to guide newcomers through the docs
[email protected]**20100303231902
 Ignore-this: a6aab44f5bf5ad97ea73e6976bc4042d
 These edits were suggested by my watching over Jake Appelbaum's shoulder as he completely ignored/skipped/missed install.html and also as he decided that debian.txt wouldn't help him with basic installation. Then I threw in a few docs edits that have been sitting around in my sandbox asking to be committed for months.
] 
[TAG allmydata-tahoe-1.6.1
[email protected]**20100228062314
 Ignore-this: eb5f03ada8ea953ee7780e7fe068539
] 
Patch bundle hash:
71df1d7c5d480486075c91c692f9569f9789c29a

_______________________________________________
tahoe-dev mailing list
[email protected]
http://allmydata.org/cgi-bin/mailman/listinfo/tahoe-dev

[tahoe-dev] darcs patch: Improve HTTP/1.1 byterange handling

Reply via email to