Checkpoint before moving stuff to HTTPCacheAlt.
Project: http://git-wip-us.apache.org/repos/asf/trafficserver/repo Commit: http://git-wip-us.apache.org/repos/asf/trafficserver/commit/7b1f37d9 Tree: http://git-wip-us.apache.org/repos/asf/trafficserver/tree/7b1f37d9 Diff: http://git-wip-us.apache.org/repos/asf/trafficserver/diff/7b1f37d9 Branch: refs/heads/ts-974-multi-range-read Commit: 7b1f37d937ced48d4699c91af6363bfa4ab69d80 Parents: ae369ee Author: Alan M. Carroll <solidwallofc...@yahoo-inc.com> Authored: Thu Dec 4 17:11:25 2014 -0600 Committer: Alan M. Carroll <solidwallofc...@yahoo-inc.com> Committed: Sat Dec 6 11:56:04 2014 -0600 ---------------------------------------------------------------------- iocore/cache/CacheHttp.cc | 162 ++++----------------- iocore/cache/CacheRead.cc | 48 ++++--- iocore/cache/I_Cache.h | 11 ++ iocore/cache/I_CacheDefs.h | 158 -------------------- iocore/cache/P_CacheHttp.h | 132 ++++++++++++++++- iocore/cache/P_CacheInternal.h | 77 +--------- lib/ts/InkErrno.h | 2 +- proxy/hdrs/HTTP.cc | 174 ++++++++++++++++++++++ proxy/hdrs/HTTP.h | 280 +++++++++++++++++++++++++++++++++++- proxy/http/HttpTransact.cc | 54 ++++--- 10 files changed, 680 insertions(+), 418 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/trafficserver/blob/7b1f37d9/iocore/cache/CacheHttp.cc ---------------------------------------------------------------------- diff --git a/iocore/cache/CacheHttp.cc b/iocore/cache/CacheHttp.cc index 3b86a9b..4d82c18 100644 --- a/iocore/cache/CacheHttp.cc +++ b/iocore/cache/CacheHttp.cc @@ -258,157 +258,47 @@ CacheHTTPInfoVector::get_handles(const char *buf, int length, RefCountObj * bloc /*------------------------------------------------------------------------- -------------------------------------------------------------------------*/ -bool -RangeSpec::parse(char const* v, int len) -{ - // Maximum # of digits permitted for an offset. Avoid issues with overflow. - static size_t const MAX_DIGITS = 15; - char const PREFIX[] = { 'b', 'y', 't', 'e', 's', '=' }; - ts::ConstBuffer src(v, len); - size_t n; - - _state = EMPTY; - src.skip(&ParseRules::is_ws); - - if (src.size() > sizeof(PREFIX) && 0 == memcmp(src.data(), PREFIX, sizeof(PREFIX))) { - _state = INVALID; // something, it needs to be correct. - src += sizeof(PREFIX); - while (src) { - ts::ConstBuffer max = src.splitOn(','); - - if (!max) { // no comma so everything in @a src should be processed as a single range. - max = src; - src.reset(); - } - ts::ConstBuffer min = max.splitOn('-'); - - src.skip(&ParseRules::is_ws); - // Spec forbids whitspace anywhere in the range element. - - if (min) { - if (ParseRules::is_digit(*min) && min.size() <= MAX_DIGITS) { - uint64_t low = ats_strto64(min.data(), min.size(), &n); - if (n < min.size()) break; // extra cruft in range, not even ws allowed - if (max) { - if (ParseRules::is_digit(*max) && max.size() <= MAX_DIGITS) { - uint64_t high = ats_strto64(max.data(), max.size(), &n); - if (n < max.size() && (max += n).skip(&ParseRules::is_ws)) - break; // non-ws cruft after maximum - else - this->add(low, high); - } else { - break; // invalid characters for maximum - } - } else { - this->add(low, UINT64_MAX); // "X-" : "offset X to end of content" - } - } else { - break; // invalid characters for minimum - } - } else { - if (max) { - if (ParseRules::is_digit(*max) && max.size() <= MAX_DIGITS) { - uint64_t high = ats_strto64(max.data(), max.size(), &n); - if (n < max.size() && (max += n).skip(&ParseRules::is_ws)) { - break; // cruft after end of maximum - } else { - this->add(high, 0); - } - } else { - break; // invalid maximum - } - } - } +uint64_t +CacheRange::consume(uint64_t size) +{ + switch (_r->_state) { + case HTTPRangeSpec::EMPTY: _offset += size; break; + case HTTPRangeSpec::SINGLE: _offset += std::min(size, (_r->_single._max - _offset) + 1 ); break; + case HTTPRangeSpec::MULTI: + while (size && _idx < static_cast<int>(_r->count())) { + uint64_t r = std::min(size, ((*_r)[_idx]._max - _offset) + 1); + _offset += r; + size -= r; + if (_offset > (*_r)[_idx]._max) + _offset = (*_r)[++_idx]._min; } - if (src) _state = INVALID; // didn't parse everything, must have been an error. + break; + default: break; } - return _state != INVALID; -} -RangeSpec& -RangeSpec::add(uint64_t low, uint64_t high) -{ - if (MULTI == _state) { - _ranges.push_back(Range(low, high)); - } else if (SINGLE == _state) { - _ranges.push_back(_single); - _ranges.push_back(Range(low,high)); - _state = MULTI; - } else { - _single._min = low; - _single._max = high; - _state = SINGLE; - } - return *this; + return _offset; } -bool -RangeSpec::finalize(uint64_t len) +void +CacheRange::generateBoundaryStr(CacheKey const& key) { - if (INVALID == _state || EMPTY == _state) { - // nothing but simplifying later logic. - } else if (0 == len) { - /* Must special case zero length content - - suffix ranges are OK but other ranges are not. - - SM must return a 200 (not 206 or 416) for a valid range on zero length content. - (this is what Apache HTTPD does and seems the least bad thing) - - Therefore we don't bother actually adjusting the ranges as values don't matter. - */ - if (!_single.isSuffix()) _state = INVALID; - if (MULTI == _state) { - for ( RangeBox::iterator spot = _ranges.begin(), limit = _ranges.end() ; spot != limit && MULTI == _state ; ++spot ) { - if (!spot->isSuffix()) _state = INVALID; - } - } - } else { // len > 0 - if (!_single.finalize(len)) _state = INVALID; - if (MULTI == _state) { - for ( RangeBox::iterator spot = _ranges.begin(), limit = _ranges.end() ; spot != limit && MULTI == _state; ++spot ) { - if (!spot->finalize(len)) _state = INVALID; - } - } - } - return INVALID != _state; + snprintf(_boundary, sizeof(_boundary), "%08" PRIu64 "%08" PRIu64 "..%08" PRIu64 + , key.slice64(0), key.slice64(1), this_ethread()->generator.random() + ); } -/*------------------------------------------------------------------------- - -------------------------------------------------------------------------*/ bool -CacheRange::finalize(uint64_t len) +CacheRange::setContentType(HTTPHdr* header) { - bool zret = super::finalize(len); - if (zret) { - if (this->isEmpty()) { // pretend it's one range [0..len) - _offset = 0; - } else { - _idx = 0; - _offset = _single._min; - } - _len = len; - } - return zret; + _ct_field = header->field_find(MIME_FIELD_CONTENT_TYPE, MIME_LEN_CONTENT_TYPE); + return NULL != _ct_field; } uint64_t -CacheRange::consume(uint64_t size) +CacheRange::calcContentLength() const { - switch (_state) { - case EMPTY: _offset += size; break; - case SINGLE: _offset += std::min(size, (_single._max - _offset) + 1 ); break; - case MULTI: - while (size && _idx < static_cast<int>(_ranges.size())) { - uint64_t r = std::min(size, (_ranges[_idx]._max - _offset) + 1); - _offset += r; - size -= r; - if (_offset > _ranges[_idx]._max) - _offset = _ranges[++_idx]._min; - } - break; - default: break; - } - - return _offset; + return _r->calcContentLength(_len, _ct_field->m_len_value); } /*------------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/trafficserver/blob/7b1f37d9/iocore/cache/CacheRead.cc ---------------------------------------------------------------------- diff --git a/iocore/cache/CacheRead.cc b/iocore/cache/CacheRead.cc index f263d82..ac5e13e 100644 --- a/iocore/cache/CacheRead.cc +++ b/iocore/cache/CacheRead.cc @@ -109,8 +109,6 @@ Cache::open_read(Continuation * cont, CacheKey * key, CacheHTTPHdr * request, { CACHE_TRY_LOCK(lock, vol->mutex, mutex->thread_holding); if (!lock.is_locked() || (od = vol->open_read(key)) || dir_probe(key, vol, &result, &last_collision)) { - MIMEField* range_field = request->field_find(MIME_FIELD_RANGE, MIME_LEN_RANGE); - c = new_CacheVC(cont); c->first_key = c->key = c->earliest_key = *key; c->vol = vol; @@ -121,12 +119,6 @@ Cache::open_read(Continuation * cont, CacheKey * key, CacheHTTPHdr * request, c->frag_type = CACHE_FRAG_TYPE_HTTP; c->params = params; c->od = od; - if (range_field) { - char const* value; - int len; - value = range_field->value_get(&len); - c->req_rs.parse(value, len); - } } if (!lock.is_locked()) { SET_CONTINUATION_HANDLER(c, &CacheVC::openReadStartHead); @@ -180,6 +172,18 @@ CacheVC::load_http_info(CacheHTTPInfoVector* info, Doc* doc, RefCountObj * block return zret; } +char const* +CacheVC::get_http_range_boundary_string(int* len) const +{ + return resp_range.getBoundaryStr(len); +} + +uint64_t +CacheVC::get_http_content_size() +{ + return resp_range.calcContentLength(); +} + int CacheVC::openReadFromWriterFailure(int event, Event * e) { @@ -636,9 +640,7 @@ Lcallreturn: LreadMain: ++fragment; doc_pos = doc->prefix_len(); - if (req_rs.isValid()) { - doc_pos += req_rs.getOffset() - frag_upper_bound; // used before update! - } + doc_pos += resp_range.getOffset() - frag_upper_bound; // used before update! frag_upper_bound += doc->data_len(); next_CacheKey(&key, &key); SET_HANDLER(&CacheVC::openReadMain); @@ -692,9 +694,9 @@ CacheVC::openReadMain(int /* event ATS_UNUSED */, Event * /* e ATS_UNUSED */) int64_t bytes = doc->len - doc_pos; IOBufferBlock *b = NULL; #ifdef HTTP_CACHE - if (req_rs.isValid()) { + if (resp_range.isActive()) { int target = -1; // target fragment index. - uint64_t target_offset = req_rs.getOffset(); + uint64_t target_offset = resp_range.getOffset(); uint64_t lower_bound = frag_upper_bound - doc->data_len(); if (target_offset < lower_bound || frag_upper_bound <= target_offset) { @@ -703,7 +705,7 @@ CacheVC::openReadMain(int /* event ATS_UNUSED */, Event * /* e ATS_UNUSED */) if (is_debug_tag_set("amc")) { char b[33], c[33]; Debug("amc", "Seek @ %" PRIu64 " [r#=%d] in %s from #%d @ %" PRIu64 "/%d/%" PRId64 ":%s%s", - target_offset, req_rs.getIdx(), first_key.toHexStr(b), fragment, frag_upper_bound, doc->len, doc->total_len, doc->key.toHexStr(c) + target_offset, resp_range.getIdx(), first_key.toHexStr(b), fragment, frag_upper_bound, doc->len, doc->total_len, doc->key.toHexStr(c) , (frags ? "" : "no frag table") ); } @@ -725,7 +727,7 @@ CacheVC::openReadMain(int /* event ATS_UNUSED */, Event * /* e ATS_UNUSED */) doc_pos = r_doc_pos; bytes = doc->len - doc_pos; } - bytes = std::min(bytes, static_cast<int64_t>(req_rs.getRemnantSize())); + bytes = std::min(bytes, static_cast<int64_t>(resp_range.getRemnantSize())); } #endif } @@ -743,7 +745,7 @@ CacheVC::openReadMain(int /* event ATS_UNUSED */, Event * /* e ATS_UNUSED */) vio.buffer.writer()->append_block(b); vio.ndone += bytes; doc_pos += bytes; - req_rs.consume(bytes); + resp_range.consume(bytes); if (vio.ntodo() <= 0) return calluser(VC_EVENT_READ_COMPLETE); else { @@ -1150,6 +1152,15 @@ CacheVC::openReadStartHead(int event, Event * e) alternate.copy_shallow(alternate_tmp); alternate.object_key_get(&key); doc_len = alternate.object_size_get(); + resp_range.setRangeSpec(&(alternate.response_get()->getRangeSpec())); + if (!resp_range.apply(request.getRangeSpec(), doc_len)) { + err = ECACHE_UNSATISFIABLE_RANGE; + goto Ldone; + } + resp_range.setContentType(alternate.response_get()); + if (resp_range.isMulti()) + resp_range.generateBoundaryStr(earliest_key); + if (key == doc->key) { // is this my data? f.single_fragment = doc->single_fragment(); ink_assert(f.single_fragment); // otherwise need to read earliest @@ -1172,11 +1183,6 @@ CacheVC::openReadStartHead(int event, Event * e) doc_len = doc->total_len; } - if (!req_rs.finalize(doc_len)) { - err = ECACHE_BAD_REQUEST_RANGE; - goto Ldone; - } - if (is_debug_tag_set("cache_read")) { // amc debug char xt[33],yt[33]; Debug("cache_read", "CacheReadStartHead - read %s target %s - %s %d of %" PRId64" bytes, %d fragments", http://git-wip-us.apache.org/repos/asf/trafficserver/blob/7b1f37d9/iocore/cache/I_Cache.h ---------------------------------------------------------------------- diff --git a/iocore/cache/I_Cache.h b/iocore/cache/I_Cache.h index 1606a23..ffdf1ba 100644 --- a/iocore/cache/I_Cache.h +++ b/iocore/cache/I_Cache.h @@ -220,6 +220,17 @@ struct CacheVConnection:public VConnection #ifdef HTTP_CACHE virtual void set_http_info(CacheHTTPInfo *info) = 0; virtual void get_http_info(CacheHTTPInfo **info) = 0; + + /** Get the boundary string for a multi-part range response. + The length of the string is returned in @a len. + + @return A point to the string. + */ + virtual char const* get_http_range_boundary_string(int* len) const = 0; + /** Get the effective content size. + This is the actual content sized modified (if appropriate) by range data. + */ + virtual uint64_t get_http_content_size() = 0; #endif virtual bool is_ram_cache_hit() const = 0; http://git-wip-us.apache.org/repos/asf/trafficserver/blob/7b1f37d9/iocore/cache/I_CacheDefs.h ---------------------------------------------------------------------- diff --git a/iocore/cache/I_CacheDefs.h b/iocore/cache/I_CacheDefs.h index ce50958..6d8392c 100644 --- a/iocore/cache/I_CacheDefs.h +++ b/iocore/cache/I_CacheDefs.h @@ -132,162 +132,4 @@ typedef CryptoHash CacheKey; word(3) - ram cache hash, lookaside cache */ - -/** A range specification. - - This represents the data for an HTTP range specification. -*/ -struct RangeSpec { - typedef RangeSpec self; - - /** A range of bytes in an object. - - If @a _min > 0 and @a _max == 0 the range is backwards and counts from the - end of the object. That is (100,0) means the last 100 bytes of content. - */ - struct Range { - uint64_t _min; - uint64_t _max; - - /// Default constructor - invalid range. - Range() : _min(UINT64_MAX), _max(1) { } - /// Construct as the range ( @a low .. @a high ) - Range(uint64_t low, uint64_t high) : _min(low), _max(high) {} - - /// Test if this range is a trailing (terminal) range. - bool isSuffix() const; - /// Test if this range is a valid range. - bool isValid() const; - /// Adjust the range values based on content size @a len. - bool finalize(uint64_t len); - /// Force the range to an invalid state. - Range& invalidate(); - }; - - /// Current state of the overall specification. - /// @internal We can distinguish between @c SINGLE and @c MULTI by looking at the - /// size of @a _ranges but we need this to mark @c EMPTY vs. not. - enum State { - EMPTY, ///< No range. - INVALID, ///< Range parsing failed. - SINGLE, ///< Single range. - MULTI, ///< Multiple ranges. - } _state; - - /// The first range value. - /// By separating this out we can avoid allocation in the case of a single - /// range value, which is by far the most common ( > 99% in my experience). - Range _single; - /// Storage for range values. - typedef std::vector<Range> RangeBox; - /// The first range is copied here if there is more than one (to simplify). - RangeBox _ranges; - - /// Default constructor - invalid range - RangeSpec(); - - /** Parse a range field and update @a this with the results. - @return @c true if @a v was a valid range specifier, @c false otherwise. - */ - bool parse(char const* v, int len); - - /** Validate and convert for a specific content @a length. - - @return @c true if the range is satisfiable per the HTTP spec, @c false otherwise. - Note a range spec with no ranges is always satisfiable. - */ - bool finalize(uint64_t length); - - /** Number of distinct ranges. - @return Number of ranges. - */ - size_t count() const; - - /// If this is a valid single range specification. - bool isSingle() const; - - /// If this is a valid multi range specification. - bool isMulti() const; - - /// Test if this contains at least one valid range. - bool isValid() const; - - /// Test if this is a valid but empty range spec. - bool isEmpty() const; - -protected: - self& add(uint64_t low, uint64_t high); -}; - -inline -RangeSpec::RangeSpec() : _state(EMPTY) -{ -} - -inline bool -RangeSpec::isSingle() const -{ - return SINGLE == _state; -} - -inline bool -RangeSpec::isMulti() const -{ - return MULTI == _state; -} - -inline bool -RangeSpec::isEmpty() const -{ - return EMPTY == _state; -} - -inline size_t -RangeSpec::count() const -{ - return SINGLE == _state ? 1 : _ranges.size(); -} - -inline bool -RangeSpec::isValid() const -{ - return SINGLE == _state || MULTI == _state; -} - -inline RangeSpec::Range& -RangeSpec::Range::invalidate() -{ - _min = UINT64_MAX; - _max = 1; - return *this; -} - -inline bool -RangeSpec::Range::isSuffix() const -{ - return 0 == _max && _min > 0; -} - -inline bool -RangeSpec::Range::isValid() const -{ - return _min <= _max || this->isSuffix(); -} - -inline bool -RangeSpec::Range::finalize(uint64_t len) -{ - ink_assert(len > 0); - bool zret = true; // is this range satisfiable for @a len? - if (this->isSuffix()) { - _max = len - 1; - _min = _min > len ? 0 : len - _min; - } else if (_min < len) { - _max = MIN(_max,len); - } else { - this->invalidate(); - zret = false; - } - return zret; -} #endif // __CACHE_DEFS_H__ http://git-wip-us.apache.org/repos/asf/trafficserver/blob/7b1f37d9/iocore/cache/P_CacheHttp.h ---------------------------------------------------------------------- diff --git a/iocore/cache/P_CacheHttp.h b/iocore/cache/P_CacheHttp.h index e64a200..f76e842 100644 --- a/iocore/cache/P_CacheHttp.h +++ b/iocore/cache/P_CacheHttp.h @@ -34,7 +34,6 @@ typedef URL CacheURL; typedef HTTPHdr CacheHTTPHdr; typedef HTTPInfo CacheHTTPInfo; - #define OFFSET_BITS 24 enum { @@ -88,6 +87,80 @@ struct CacheHTTPInfoVector Ptr<RefCountObj> vector_buf; }; +/** Range operation tracking. + + This holds a range specification. It also tracks the current object offset and the individual range. + + For simplification of the logic that uses this class it will pretend to be a single range of + the object size if it is empty. To return the correct response we still need to distinuish + those two cases. +*/ +class CacheRange +{ + public: + typedef CacheRange self; ///< Self reference type. + + /// Default constructor + CacheRange() : _offset(0), _idx(-1), _r(NULL), _ct_field(NULL) { } + + /// Set the internal range spec pointer to @a src. + self& setRangeSpec(HTTPRangeSpec* src); + + /// Test if the range should be active (used). + /// @internal This means it has ranges and should be used to do seeks on the content. + bool isActive() const; + + /// Test for multiple ranges. + bool isMulti() const; + + /// Get the current object offset + uint64_t getOffset() const; + + /// Get the current range index. + int getIdx() const; + + /// Get the remaining contiguous bytes for the current range. + uint64_t getRemnantSize() const; + + /** Advance @a size bytes in the range spec. + + @return The resulting offset in the object. + */ + uint64_t consume(uint64_t size); + + /** Apply a @a src range and content @a len to the contained range spec. + + @return @c true if successfully applied, @c false otherwise. + */ + bool apply(HTTPRangeSpec const& src, uint64_t len); + + /** Get the range boundary string. + @a len if not @c NULL receives the length of the string. + */ + char const* getBoundaryStr(int* len) const; + + /** Generate the range boundary string */ + void generateBoundaryStr(CacheKey const& key); + + /** Stash the Content-Type field pointer from a @a header. + + @return @c true if a Content-Type field was found in @a header, @c false if not. + */ + bool setContentType(HTTPHdr* header); + + /** Calculate the effective HTTP content length value. + */ + uint64_t calcContentLength() const; + + protected: + uint64_t _len; ///< Total object length. + uint64_t _offset; ///< Offset in content. + int _idx; ///< Current range index. (< 0 means not in a range) + HTTPRangeSpec* _r; ///< The actual ranges. + MIMEField* _ct_field; ///< Content-Type field. + char _boundary[HTTP_RANGE_BOUNDARY_LEN]; +}; + TS_INLINE CacheHTTPInfo * CacheHTTPInfoVector::get(int idx) { @@ -96,4 +169,61 @@ CacheHTTPInfoVector::get(int idx) return &data[idx].alternate; } +inline CacheRange& +CacheRange::setRangeSpec(HTTPRangeSpec* src) +{ + _r = src; + return *this; +} + +inline bool +CacheRange::apply(HTTPRangeSpec const& src, uint64_t len) +{ + return _r && _r->apply(src, len); +} + +inline bool +CacheRange::isActive() const +{ + return _r && (_r->isSingle() || _r->isMulti()); +} + +inline uint64_t +CacheRange::getOffset() const +{ + return _offset; +} + +inline int +CacheRange::getIdx() const +{ + return _idx; +} + +inline uint64_t +CacheRange::getRemnantSize() const +{ + uint64_t zret = 0; + + if (!_r || _r->isEmpty()) + zret = _len - _offset; + else if (_r->isValid() && 0 <= _idx && _idx < static_cast<int>(_r->count())) + zret = ((*_r)[_idx]._max - _offset) + 1; + + return zret; +} + +inline char const* +CacheRange::getBoundaryStr(int* len) const +{ + if (len) *len = sizeof(_boundary); + return _boundary; +} + +inline bool +CacheRange::isMulti() const +{ + return _r && _r->isMulti(); +} + #endif /* __CACHE_HTTP_H__ */ http://git-wip-us.apache.org/repos/asf/trafficserver/blob/7b1f37d9/iocore/cache/P_CacheInternal.h ---------------------------------------------------------------------- diff --git a/iocore/cache/P_CacheInternal.h b/iocore/cache/P_CacheInternal.h index 2de18a3..e386c98 100644 --- a/iocore/cache/P_CacheInternal.h +++ b/iocore/cache/P_CacheInternal.h @@ -233,46 +233,6 @@ extern int cache_config_mutex_retry_delay; extern int good_interim_disks; #endif -/** Range operation tracking. - - This holds a range specification. It also tracks the current object offset and the individual range. - - For simplification of the logic that uses this class it will pretend to be a single range of - the object size if it is empty. To return the correct response we still need to distinuish - those two cases. -*/ -class CacheRange : public RangeSpec -{ - public: - typedef CacheRange self; ///< Self reference type. - typedef RangeSpec super; ///< Parent type. - - /// Default constructor - CacheRange() : super(), _offset(0), _idx(-1) { } - - /// Convert to specific values based on content @a length. - bool finalize(uint64_t length); - - /// Get the current object offset - uint64_t getOffset() const; - - /// Get the current range index. - int getIdx() const; - - /// Get the remaining contiguous bytes for the current range. - uint64_t getRemnantSize() const; - - /** Advance @a size bytes in the range spec. - - @return The resulting offset in the object. - */ - uint64_t consume(uint64_t size); - - protected: - uint64_t _len; ///< Total object length. - uint64_t _offset; ///< Offset in content. - int _idx; ///< Current range index. (< 0 means not in a range) -}; // CacheVC struct CacheVC: public CacheVConnection @@ -414,7 +374,11 @@ struct CacheVC: public CacheVConnection /// The table of @a frags and its @a count must be provided. int frag_idx_for_offset(HTTPInfo::FragOffset* frags, int count, uint64_t offset); + virtual char const* get_http_range_boundary_string(int* len) const; + virtual uint64_t get_http_content_size(); + #endif + virtual bool is_pread_capable(); virtual bool set_pin_in_cache(time_t time_pin); virtual time_t get_pin_in_cache(); @@ -503,6 +467,7 @@ struct CacheVC: public CacheVConnection uint64_t total_len; // total length written and available to write uint64_t doc_len; // total_length (of the selected alternate for HTTP) uint64_t update_len; + CacheRange resp_range; ///< Tracking information for range data for response. /// The offset in the content of the first byte beyond the end of the current fragment. /// @internal This seems very weird but I couldn't figure out how to keep the more sensible /// lower bound correctly updated. @@ -516,12 +481,7 @@ struct CacheVC: public CacheVConnection int header_to_write_len; void *header_to_write; short writer_lock_retry; - /* Range specs for range based operations. - @a req_rng is the range spec in the request from the User Agent. - @a rsp_rng is the range spec sent to the origin server. - */ - CacheRange req_rs; - CacheRange rsp_rs; + #if TS_USE_INTERIM_CACHE == 1 InterimCacheVol *interim_vol; MigrateToInterimCache *mts; @@ -688,8 +648,6 @@ free_CacheVC(CacheVC *cont) cont->alternate_index = CACHE_ALT_INDEX_DEFAULT; if (cont->scan_vol_map) ats_free(cont->scan_vol_map); - cont->req_rs.~CacheRange(); - cont->rsp_rs.~CacheRange(); memset((char *) &cont->vio, 0, cont->size_to_init); #ifdef CACHE_STAT_PAGES ink_assert(!cont->stat_link.next && !cont->stat_link.prev); @@ -1419,29 +1377,6 @@ local_cache() return theCache; } -inline uint64_t -CacheRange::getOffset() const -{ - return _offset; -} - -inline int -CacheRange::getIdx() const -{ - return _idx; -} - -inline uint64_t -CacheRange::getRemnantSize() const -{ - uint64_t zret = 0; - if (this->isEmpty()) zret = _len - _offset; - else if (this->isSingle()) zret = (_single._max - _offset) + 1; - else if (this->isMulti() && 0 <= _idx && _idx < static_cast<int>(_ranges.size())) - zret = (_ranges[_idx]._max - _offset) + 1; - return zret; -} - LINK_DEFINITION(CacheVC, opendir_link) #endif /* _P_CACHE_INTERNAL_H__ */ http://git-wip-us.apache.org/repos/asf/trafficserver/blob/7b1f37d9/lib/ts/InkErrno.h ---------------------------------------------------------------------- diff --git a/lib/ts/InkErrno.h b/lib/ts/InkErrno.h index bb9bfb1..45a846d 100644 --- a/lib/ts/InkErrno.h +++ b/lib/ts/InkErrno.h @@ -66,7 +66,7 @@ #define ECACHE_NOT_READY (CACHE_ERRNO+7) #define ECACHE_ALT_MISS (CACHE_ERRNO+8) #define ECACHE_BAD_READ_REQUEST (CACHE_ERRNO+9) -#define ECACHE_BAD_REQUEST_RANGE (CACHE_ERRNO+10) +#define ECACHE_UNSATISFIABLE_RANGE (CACHE_ERRNO+10) #define EHTTP_ERROR (HTTP_ERRNO+0) http://git-wip-us.apache.org/repos/asf/trafficserver/blob/7b1f37d9/proxy/hdrs/HTTP.cc ---------------------------------------------------------------------- diff --git a/proxy/hdrs/HTTP.cc b/proxy/hdrs/HTTP.cc index ce28ba7..06bebaa 100644 --- a/proxy/hdrs/HTTP.cc +++ b/proxy/hdrs/HTTP.cc @@ -2132,3 +2132,177 @@ HTTPInfo::push_frag_offset(FragOffset offset) { m_alt->m_frag_offsets[m_alt->m_frag_offset_count++] = offset; } + +/*********************************************************************** + * * + * R A N G E S U P P O R T * + * * + ***********************************************************************/ + +bool +HTTPRangeSpec::parse(char const* v, int len) +{ + // Maximum # of digits permitted for an offset. Avoid issues with overflow. + static size_t const MAX_DIGITS = 15; + ts::ConstBuffer src(v, len); + size_t n; + + _state = EMPTY; + src.skip(&ParseRules::is_ws); + + if (src.size() > sizeof(HTTP_LEN_BYTES)+1 && + 0 == memcmp(src.data(), HTTP_VALUE_BYTES, HTTP_LEN_BYTES) && '=' == src[HTTP_LEN_BYTES] + ) { + _state = INVALID; // something, it needs to be correct. + src += HTTP_LEN_BYTES+1; + while (src) { + ts::ConstBuffer max = src.splitOn(','); + + if (!max) { // no comma so everything in @a src should be processed as a single range. + max = src; + src.reset(); + } + + ts::ConstBuffer min = max.splitOn('-'); + + src.skip(&ParseRules::is_ws); + // Spec forbids whitspace anywhere in the range element. + + if (min) { + if (ParseRules::is_digit(*min) && min.size() <= MAX_DIGITS) { + uint64_t low = ats_strto64(min.data(), min.size(), &n); + if (n < min.size()) break; // extra cruft in range, not even ws allowed + if (max) { + if (ParseRules::is_digit(*max) && max.size() <= MAX_DIGITS) { + uint64_t high = ats_strto64(max.data(), max.size(), &n); + if (n < max.size() && (max += n).skip(&ParseRules::is_ws)) + break; // non-ws cruft after maximum + else + this->add(low, high); + } else { + break; // invalid characters for maximum + } + } else { + this->add(low, UINT64_MAX); // "X-" : "offset X to end of content" + } + } else { + break; // invalid characters for minimum + } + } else { + if (max) { + if (ParseRules::is_digit(*max) && max.size() <= MAX_DIGITS) { + uint64_t high = ats_strto64(max.data(), max.size(), &n); + if (n < max.size() && (max += n).skip(&ParseRules::is_ws)) { + break; // cruft after end of maximum + } else { + this->add(high, 0); + } + } else { + break; // invalid maximum + } + } + } + } + if (src) _state = INVALID; // didn't parse everything, must have been an error. + } + return _state != INVALID; +} + +HTTPRangeSpec& +HTTPRangeSpec::add(uint64_t low, uint64_t high) +{ + if (MULTI == _state) { + _ranges.push_back(Range(low, high)); + } else if (SINGLE == _state) { + _ranges.push_back(_single); + _ranges.push_back(Range(low,high)); + _state = MULTI; + } else { + _single._min = low; + _single._max = high; + _state = SINGLE; + } + return *this; +} + +bool +HTTPRangeSpec::apply(self const& that, uint64_t len) +{ + _state = that._state; + if (INVALID == _state || EMPTY == _state) { + // nothing - simplifying later logic. + } else if (0 == len) { + /* Must special case zero length content + - suffix ranges are OK but other ranges are not. + - Best option is to return a 200 (not 206 or 416) for all suffix range spec on zero length content. + (this is what Apache HTTPD does) + - So, mark result as either @c UNSATISFIABLE or @c EMPTY, don't bother copying any ranges. + */ + _state = EMPTY; + if (!that._single.isSuffix()) _state = UNSATISFIABLE; + for ( RangeBox::const_iterator spot = that._ranges.begin(), limit = that._ranges.end() ; spot != limit && EMPTY == _state ; ++spot ) { + if (!spot->isSuffix()) _state = UNSATISFIABLE; + } + } else if (that.isSingle()) { + _single = that._single; + if (!_single.apply(len)) _state = UNSATISFIABLE; + } else { // gotta be MULTI + _ranges.reserve(that._ranges.size()); + for ( RangeBox::const_iterator spot = that._ranges.begin(), limit = that._ranges.end() ; spot != limit ; ++spot ) { + Range r(*spot); + if (r.apply(len)) _ranges.push_back(r); + } + if (_ranges.size() > 0) { + _single = _ranges[0]; + if (_ranges.size() == 1) _state = SINGLE; + } else { + _state = UNSATISFIABLE; + } + } + return this->isValid(); +} + +HTTPRangeSpec& +HTTPHdr::getRangeSpec() +{ + if (!m_range_parsed && HTTP_TYPE_REQUEST == m_http->m_polarity) { + MIMEField* f = this->field_find(MIME_FIELD_RANGE, MIME_LEN_RANGE); + if (f) { + int len; + char const* value = f->value_get(&len); + if (value) { + m_range_spec.parse(value,len); + } + } + } + m_range_parsed = true; + return m_range_spec; +} + +int +Calc_Digital_Length(uint64_t x) +{ + char buff[32]; // big enough for 64 bit # + return snprintf(buff, sizeof(buff), "%" PRIu64, x); +} + +uint64_t +HTTPRangeSpec::calcContentLength(uint64_t object_size, uint64_t ct_len) const +{ + uint64_t size = object_size; + size_t nr = this->count(); + + if (nr >= 1) { + size = this->size(); + if (nr > 1) { + size_t l_size = Calc_Digital_Length(object_size); + // CR LF "--" boundary-string CR LF "Content-Range" ": " "bytes " X "-" Y "/" Z + uint64_t sep_size = 2 + 2 + HTTP_RANGE_BOUNDARY_LEN + 2 + MIME_LEN_CONTENT_RANGE + 2 + HTTP_LEN_BYTES + 1 + l_size + 1 +l_size + 1 + l_size + 2; + + if (ctf) sep_size += MIME_LEN_CONTENT_TYPE + 2 + ct_len + 2; + size += nr * sep_size; + } + } + return size; +} + http://git-wip-us.apache.org/repos/asf/trafficserver/blob/7b1f37d9/proxy/hdrs/HTTP.h ---------------------------------------------------------------------- diff --git a/proxy/hdrs/HTTP.h b/proxy/hdrs/HTTP.h index f9175e5..cbe0316 100644 --- a/proxy/hdrs/HTTP.h +++ b/proxy/hdrs/HTTP.h @@ -25,6 +25,7 @@ #define __HTTP_H__ #include <assert.h> +#include <vector> #include "Arena.h" #include "INK_MD5.h" #include "MIME.h" @@ -429,6 +430,8 @@ extern int HTTP_LEN_S_MAXAGE; extern int HTTP_LEN_NEED_REVALIDATE_ONCE; extern int HTTP_LEN_100_CONTINUE; +static size_t const HTTP_RANGE_BOUNDARY_LEN = 32 + 2 + 16; + /* Private */ void http_hdr_adjust(HTTPHdrImpl *hdrp, int32_t offset, int32_t length, int32_t delta); @@ -510,6 +513,138 @@ public: int32_t m_version; }; +/** A set of content ranges. + + This represents the data for an HTTP range specification. + On a request this contains the request ranges. On a response it is the actual ranges in the + response, which are the requested ranges modified by the actual content length. +*/ +struct HTTPRangeSpec { + typedef HTTPRangeSpec self; + + /** A range of bytes in an object. + + If @a _min > 0 and @a _max == 0 the range is backwards and counts from the + end of the object. That is (100,0) means the last 100 bytes of content. + */ + struct Range { + uint64_t _min; + uint64_t _max; + + /// Default constructor - invalid range. + Range() : _min(UINT64_MAX), _max(1) { } + /// Construct as the range ( @a low .. @a high ) + Range(uint64_t low, uint64_t high) : _min(low), _max(high) {} + + /// Test if this range is a suffix range. + bool isSuffix() const; + /// Test if this range is a valid range. + bool isValid() const; + /// Get the size (in bytes) of the range. + uint64_t size() const; + /** Convert range to absolute values for a content length of @a len. + + @return @c true if the range was valid for @a len, @c false otherwise. + */ + bool apply(uint64_t len); + + /// Force the range to an empty state. + Range& invalidate(); + }; + + /// Range iteration type. + typedef Range* iterator; + + /// Current state of the overall specification. + /// @internal We can distinguish between @c SINGLE and @c MULTI by looking at the + /// size of @a _ranges but we need this to mark @c EMPTY vs. not. + enum State { + EMPTY, ///< No range. + INVALID, ///< Range parsing failed. + UNSATISFIABLE, ///< Content length application failed. + SINGLE, ///< Single range. + MULTI, ///< Multiple ranges. + } _state; + + /// The first range value. + /// By separating this out we can avoid allocation in the case of a single + /// range value, which is by far the most common ( > 99% in my experience). + Range _single; + /// Storage for range values. + typedef std::vector<Range> RangeBox; + /// The first range is copied here if there is more than one (to simplify). + RangeBox _ranges; + + /// Default constructor - invalid range + HTTPRangeSpec(); + + /** Parse a range field @a value and update @a this with the results. + @return @c true if @a value was a valid range specifier, @c false otherwise. + */ + bool parse(char const* value, int len); + + /** Copy ranges from @a while applying them to the content @a length. + + Ranges are copied if valid for @a length and converted to absolute offsets. The number of ranges + after application may be less than the @a src number of ranges. In addition ranges will be clipped + to @a length. + + @return @c true if the range spec is satisfiable, @c false otherwise. + Note a range spec with no ranges is always satisfiable and that suffix ranges are also + always satisfiable. + */ + bool apply(self const& that, uint64_t length); + + /** Number of distinct ranges. + @return Number of ranges. + */ + size_t count() const; + + /// Get the size (in bytes) of the ranges. + uint64_t size() const; + + /// If this is a valid single range specification. + bool isSingle() const; + + /// If this is a valid multi range specification. + bool isMulti() const; + + /// Test if this contains at least one valid range. + bool hasRanges() const; + + /// Test if this is a well formed range (may be empty). + bool isValid() const; + + /// Test if this is a valid but empty range spec. + bool isEmpty() const; + + /// Test if this is an unsatisfied range. + bool isUnsatisfied() const; + + /// Access the range at index @a idx. + Range& operator [] (int n); + + /** Calculate the content length for this range specification. + + @note If a specific content length has not been @c apply 'd this will not produce + a usable result. + + @return The content length for the ranges including the range separators. + */ + uint64_t calcContentLength( + uint64_t base_content_size, ///< Content size w/o ranges. + uint64_t ct_len ///< Length of Content-Type field value. + ) const; + + /// Iterator for first range. + iterator begin(); + /// Iterator past last range. + iterator end(); + +protected: + self& add(uint64_t low, uint64_t high); +}; + class IOBufferReader; class HTTPHdr: public MIMEHdr @@ -529,6 +664,15 @@ public: /// also had a port, @c false otherwise. mutable bool m_port_in_header; + /// Parsed data from the RANGE field. + /// For requests, this is the RANGE field specification. + /// For responses, this is the RANGE field applied to the content length. + HTTPRangeSpec m_range_spec; + /// This is the content type, detached from the response header, if needed. + MIMEField* m_range_content_type; + /// Have we parsed the range field yet? + bool m_range_parsed; + HTTPHdr(); ~HTTPHdr(); @@ -651,6 +795,12 @@ public: const char *reason_get(int *length); void reason_set(const char *value, int length); + /// Get the internal @c HTTPRangeSpec instance. + HTTPRangeSpec& getRangeSpec(); + /// Locate and parse (if present) the @c Range header field. + /// The results are put in to the internal @c HTTPRangeSpec instance. + bool parse_range(); + MIMEParseResult parse_req(HTTPParser *parser, const char **start, const char *end, bool eof); MIMEParseResult parse_resp(HTTPParser *parser, const char **start, const char *end, bool eof); @@ -799,7 +949,7 @@ HTTPVersion::operator <=(const HTTPVersion & hv) const inline HTTPHdr::HTTPHdr() - : MIMEHdr(), m_http(NULL), m_url_cached(), m_target_cached(false) + : MIMEHdr(), m_http(NULL), m_url_cached(), m_target_cached(false), m_range_content_type(NULL), m_range_parsed(false) { } @@ -1523,4 +1673,132 @@ HTTPInfo::get_frag_offset_count() { return m_alt ? m_alt->m_frag_offset_count : 0; } +inline +HTTPRangeSpec::HTTPRangeSpec() : _state(EMPTY) +{ +} + +inline bool +HTTPRangeSpec::isSingle() const +{ + return SINGLE == _state; +} + +inline bool +HTTPRangeSpec::isMulti() const +{ + return MULTI == _state; +} + +inline bool +HTTPRangeSpec::isEmpty() const +{ + return EMPTY == _state; +} + +inline bool +HTTPRangeSpec::isUnsatisfied() const +{ + return UNSATISFIABLE == _state; +} + +inline size_t +HTTPRangeSpec::count() const +{ + return SINGLE == _state ? 1 : _ranges.size(); +} + +inline bool +HTTPRangeSpec::hasRanges() const +{ + return SINGLE == _state || MULTI == _state; +} + +inline bool +HTTPRangeSpec::isValid() const +{ + return SINGLE == _state || MULTI == _state || EMPTY == _state; +} + +inline HTTPRangeSpec::Range& +HTTPRangeSpec::Range::invalidate() +{ + _min = UINT64_MAX; + _max = 1; + return *this; +} + +inline bool +HTTPRangeSpec::Range::isSuffix() const +{ + return 0 == _max && _min > 0; +} + +inline bool +HTTPRangeSpec::Range::isValid() const +{ + return _min <= _max || this->isSuffix(); +} + +inline uint64_t +HTTPRangeSpec::Range::size() const +{ + return 1 + (_max - _min); +} + +inline uint64_t +HTTPRangeSpec::size() const +{ + uint64_t size = 0; + if (this->isSingle()) size = _single.size(); + else if (this->isMulti()) { + for ( RangeBox::const_iterator spot = _ranges.begin(), limit = _ranges.end() ; spot != limit ; ++spot) + size += spot->size(); + } + return size; +} + +inline bool +HTTPRangeSpec::Range::apply(uint64_t len) +{ + ink_assert(len > 0); + bool zret = true; // is this range satisfiable for @a len? + if (this->isSuffix()) { + _max = len - 1; + _min = _min > len ? 0 : len - _min; + } else if (_min < len) { + _max = MIN(_max,len); + } else { + this->invalidate(); + zret = false; + } + return zret; +} + +inline HTTPRangeSpec::Range& +HTTPRangeSpec::operator [] (int n) +{ + return SINGLE == _state ? _single : _ranges[n]; +} + +inline HTTPRangeSpec::iterator +HTTPRangeSpec::begin() +{ + switch (_state) { + case SINGLE: return &_single; + case MULTI: return &(*(_ranges.begin())); + default: return NULL; + } +} + +inline HTTPRangeSpec::iterator +HTTPRangeSpec::end() +{ + switch (_state) { + case SINGLE: return (&_single)+1; + case MULTI: return &(*(_ranges.end())); + default: return NULL; + } +} + #endif /* __HTTP_H__ */ http://git-wip-us.apache.org/repos/asf/trafficserver/blob/7b1f37d9/proxy/http/HttpTransact.cc ---------------------------------------------------------------------- diff --git a/proxy/http/HttpTransact.cc b/proxy/http/HttpTransact.cc index c348c29a..b1f51c3 100644 --- a/proxy/http/HttpTransact.cc +++ b/proxy/http/HttpTransact.cc @@ -47,8 +47,7 @@ #include "HttpClientSession.h" #include "I_Machine.h" -static char range_type[] = "multipart/byteranges; boundary=RANGE_SEPARATOR"; -#define RANGE_NUMBERS_LENGTH 60 +static char const HTTP_RANGE_MULTIPART_CONTENT_TYPE[] = "multipart/byteranges; boundary="; #define HTTP_INCREMENT_TRANS_STAT(X) update_stat(s, X, 1); #define HTTP_SUM_TRANS_STAT(X,S) update_stat(s, X, (ink_statval_t) S); @@ -6581,14 +6580,13 @@ HttpTransact::handle_content_length_header(State* s, HTTPHdr* header, HTTPHdr* b case SOURCE_HTTP_ORIGIN_SERVER: // We made our decision about whether to trust the // response content length in init_state_vars_from_response() - if (s->range_setup != HttpTransact::RANGE_NOT_TRANSFORM_REQUESTED) - break; + break; case SOURCE_CACHE: // if we are doing a single Range: request, calculate the new // C-L: header - if (s->range_setup == HttpTransact::RANGE_NOT_TRANSFORM_REQUESTED) { - change_response_header_because_of_range_request(s,header); + if (base->hasRanges()) + change_response_header_because_of_range_request(s,header,base); s->hdr_info.trust_response_cl = true; } //////////////////////////////////////////////// @@ -8760,8 +8758,10 @@ HttpTransact::delete_warning_value(HTTPHdr* to_warn, HTTPWarningCode warning_cod void HttpTransact::change_response_header_because_of_range_request(State *s, HTTPHdr * header) { - MIMEField *field; + MIMEField *field = header->field_find(MILE_FIELD_CONTENT_TYPE, MIME_LEN_CONTENT_TYPE); char *reason_phrase; + HTTPHdr* cached_response = cache_sm.cache_read_vc->get_response(); + HTTPRangeSpec& rs = cached_response->getRangeSpec(); Debug("http_trans", "Partial content requested, re-calculating content-length"); @@ -8770,35 +8770,31 @@ HttpTransact::change_response_header_because_of_range_request(State *s, HTTPHdr header->reason_set(reason_phrase, strlen(reason_phrase)); // set the right Content-Type for multiple entry Range - if (s->num_range_fields > 1) { - field = header->field_find(MIME_FIELD_CONTENT_TYPE, MIME_LEN_CONTENT_TYPE); + if (rs.isMulti()) { // means we need a boundary string. + char buff[(sizeof(HTTP_MULTIPART_RANGE_CONTENT_TYPE)-1) + HTTP_RANGE_BOUNDARY_LEN]; if (field != NULL) header->field_delete(MIME_FIELD_CONTENT_TYPE, MIME_LEN_CONTENT_TYPE); field = header->field_create(MIME_FIELD_CONTENT_TYPE, MIME_LEN_CONTENT_TYPE); - field->value_append(header->m_heap, header->m_mime, range_type, sizeof(range_type) - 1); + snprintf(buff, sizeof(buff), "%s%.*s", HTTP_MULTIPART_RANGE_CONTENT_TYPE, HTTP_RANGE_BOUNDARY_LEN, cache_sm.cache_read_vc->getBoundaryStr()); + field->value_append(header->m_heap, header->m_mime, buff, sizeof(buff)); header->field_attach(field); - // TODO: There's a known bug here where the Content-Length is not correct for multi-part - // Range: requests. - header->set_content_length(s->range_output_cl); - } else { - if (s->cache_info.object_read && s->cache_info.object_read->valid()) { - // TODO: It's unclear under which conditions we need to update the Content-Range: header, - // many times it's already set correctly before calling this. For now, always try do it - // when we have the information for it available. - // TODO: Also, it's unclear as to why object_read->valid() is not always true here. - char numbers[RANGE_NUMBERS_LENGTH]; - header->field_delete(MIME_FIELD_CONTENT_RANGE, MIME_LEN_CONTENT_RANGE); - field = header->field_create(MIME_FIELD_CONTENT_RANGE, MIME_LEN_CONTENT_RANGE); - snprintf(numbers, sizeof(numbers), "bytes %" PRId64"-%" PRId64"/%" PRId64, s->ranges[0]._start, s->ranges[0]._end, - s->cache_info.object_read->object_size_get()); - field->value_set(header->m_heap, header->m_mime, numbers, strlen(numbers)); - header->field_attach(field); - } - // Always update the Content-Length: header. - header->set_content_length(s->range_output_cl); + header->set_content_length(cached_response()->calcContentLength()); + } else if (rs.isSingle()) { + int n; + char numbers[RANGE_NUMBERS_LENGTH]; + header->field_delete(MIME_FIELD_CONTENT_RANGE, MIME_LEN_CONTENT_RANGE); + field = header->field_create(MIME_FIELD_CONTENT_RANGE, MIME_LEN_CONTENT_RANGE); + n = snprintf( numbers, sizeof(numbers), "%s %" PRIu64"-%" PRIu64"/%" PRId64 + , HTTP_VALUE_BYTES + , (*rs)[0]._min, (*rs)[0]._max + , cached_response->get_content_length() + ); + field->value_set(header->m_heap, header->m_mime, numbers, n); + header->field_attach(field); + header->set_content_length(cached_response()->calcContentLength()); } }