rob05c commented on a change in pull request #2262: Part 1 of Range Request handling URL: https://github.com/apache/incubator-trafficcontrol/pull/2262#discussion_r188122792
########## File path: grove/plugin/range_req_handler.go ########## @@ -0,0 +1,225 @@ +package plugin + +/* + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +import ( + "crypto/rand" + "encoding/hex" + "encoding/json" + "fmt" + "net/http" + "strconv" + "strings" + + "github.com/apache/incubator-trafficcontrol/grove/web" + "github.com/apache/incubator-trafficcontrol/lib/go-log" +) + +type byteRange struct { + Start int64 + End int64 +} + +type rangeRequestConfig struct { + Mode string `json::"mode"` +} + +func init() { + AddPlugin(10000, Funcs{ + load: rangeReqHandleLoad, + onRequest: rangeReqHandlerOnRequest, + beforeCacheLookUp: rangeReqHandleBeforeCacheLookup, + beforeParentRequest: rangeReqHandleBeforeParent, + beforeRespond: rangeReqHandleBeforeRespond, + }) +} + +// rangeReqHandleLoad loads the configuration +func rangeReqHandleLoad(b json.RawMessage) interface{} { + cfg := rangeRequestConfig{} + log.Errorf("rangeReqHandleLoad loading: %s", b) + + err := json.Unmarshal(b, &cfg) + if err != nil { + log.Errorln("range_rew_handler loading config, unmarshalling JSON: " + err.Error()) + return nil + } + if !(cfg.Mode == "get_full_serve_range" || cfg.Mode == "patch") { + log.Errorf("Unknown mode for range_req_handler plugin: %s", cfg.Mode) + } + log.Debugf("range_rew_handler: load success: %+v\n", cfg) + return &cfg +} + +// rangeReqHandlerOnRequest determines if there is a Range header, and puts the ranges in *d.Context as a []byteRanges +func rangeReqHandlerOnRequest(icfg interface{}, d OnRequestData) bool { + rHeader := d.R.Header.Get("Range") + if rHeader == "" { + log.Debugf("No Range header found") + return false + } + log.Debugf("Range string is: %s", rHeader) + // put the ranges [] in the context so we can use it later + byteRanges := parseRangeHeader(rHeader) + *d.Context = byteRanges + return false +} + +func rangeReqHandleBeforeCacheLookup(icfg interface{}, d BeforeCacheLookUpData, overRideFunc func(string)) { + cfg, ok := icfg.(*rangeRequestConfig) + if !ok { + log.Errorf("range_req_handler config '%v' type '%T' expected *rangeRequestConfig\n", icfg, icfg) + return + } + if cfg.Mode == "store_ranges" { + sep := "?" + if strings.Contains(d.DefaultCacheKey, "?") { + sep = "&" + } + newKey := d.DefaultCacheKey + sep + "grove_range_req_handler_plugin_data=" + d.Req.Header.Get("Range") + overRideFunc(newKey) + log.Debugf("range_req_handler: store_ranges default key:%s, new key:%s", d.DefaultCacheKey, newKey) + } +} + +// rangeReqHandleBeforeParent changes the parent request if needed (mode == get_full_serve_range) +func rangeReqHandleBeforeParent(icfg interface{}, d BeforeParentRequestData) { + log.Debugf("rangeReqHandleBeforeParent calling.") + rHeader := d.Req.Header.Get("Range") + if rHeader == "" { + log.Debugf("No Range header found") + return + } + log.Debugf("Range string is: %s", rHeader) + cfg, ok := icfg.(*rangeRequestConfig) + if !ok { + log.Errorf("range_req_handler config '%v' type '%T' expected *rangeRequestConfig\n", icfg, icfg) + return + } + if cfg.Mode == "get_full_serve_range" { + // get_full_serve_range means get the whole thing from parent/org, but serve the requested range. Just remove the Range header from the upstream request + d.Req.Header.Del("Range") + } + return +} + +// rangeReqHandleBeforeRespond builds the 206 response +// Assume all the needed ranges have been put in cache before, which is the truth for "get_full_serve_range" mode which gets the whole object into cache. +func rangeReqHandleBeforeRespond(icfg interface{}, d BeforeRespondData) { + log.Debugf("rangeReqHandleBeforeRespond calling\n") + ictx := d.Context + ctx, ok := (*ictx).([]byteRange) + if !ok { + log.Errorf("Invalid context: %v", ictx) + } + if len(ctx) == 0 { + return // there was no (valid) range header + } + + cfg, ok := icfg.(*rangeRequestConfig) + if !ok { + log.Errorf("range_req_handler config '%v' type '%T' expected *rangeRequestConfig\n", icfg, icfg) + return + } + if cfg.Mode == "store_ranges" { + return // no need to do anything here. + } + + multipartBoundaryString := "" + originalContentType := d.Hdr.Get("Content-type") + *d.Hdr = web.CopyHeader(*d.Hdr) // copy the headers, we don't want to mod the cacheObj + if len(ctx) > 1 { + //multipart = true + multipartBoundaryBytes := make([]byte, 8) + if _, err := rand.Read(multipartBoundaryBytes); err != nil { + log.Errorf("Error with rand.Read: %v", err) + } + multipartBoundaryString = hex.EncodeToString(multipartBoundaryBytes) + d.Hdr.Set("Content-Type", fmt.Sprintf("multipart/byteranges; boundary=%s", multipartBoundaryString)) + } + totalContentLength, err := strconv.ParseInt(d.Hdr.Get("Content-Length"), 10, 64) + if err != nil { + log.Errorf("Invalid Content-Length header: %v", d.Hdr.Get("Content-Length")) + } + body := make([]byte, 0) + for _, thisRange := range ctx { + if thisRange.End == -1 || thisRange.End >= totalContentLength { // if the end range is "", or too large serve until the end + thisRange.End = totalContentLength - 1 + } + log.Debugf("range:%d-%d", thisRange.Start, thisRange.End) + if multipartBoundaryString != "" { + body = append(body, []byte(fmt.Sprintf("\r\n--%s\r\n", multipartBoundaryString))...) Review comment: Golang Printf functions are incredibly slow. I know the code is uglier, but I'd recommend using string concatenation and `strconv.Itoa`, possibly even consider `bytes.Buffer`. This is in the request path, so it'll likely directly impact the number of requests Grove can handle concurrently. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [email protected] With regards, Apache Git Services
