VictoriaMetrics/vendor/github.com/klauspost/compress/gzhttp/compress.go

863 lines
24 KiB
Go
Raw Normal View History

package gzhttp
import (
"bufio"
"fmt"
"io"
"mime"
"net"
"net/http"
"strconv"
"strings"
"sync"
"github.com/klauspost/compress/gzhttp/writer"
"github.com/klauspost/compress/gzhttp/writer/gzkp"
"github.com/klauspost/compress/gzip"
)
const (
// HeaderNoCompression can be used to disable compression.
// Any header value will disable compression.
// The Header is always removed from output.
HeaderNoCompression = "No-Gzip-Compression"
vary = "Vary"
acceptEncoding = "Accept-Encoding"
contentEncoding = "Content-Encoding"
contentRange = "Content-Range"
acceptRanges = "Accept-Ranges"
contentType = "Content-Type"
contentLength = "Content-Length"
eTag = "ETag"
)
type codings map[string]float64
const (
// DefaultQValue is the default qvalue to assign to an encoding if no explicit qvalue is set.
// This is actually kind of ambiguous in RFC 2616, so hopefully it's correct.
// The examples seem to indicate that it is.
DefaultQValue = 1.0
// DefaultMinSize is the default minimum size until we enable gzip compression.
// 1500 bytes is the MTU size for the internet since that is the largest size allowed at the network layer.
// If you take a file that is 1300 bytes and compress it to 800 bytes, its still transmitted in that same 1500 byte packet regardless, so youve gained nothing.
// That being the case, you should restrict the gzip compression to files with a size (plus header) greater than a single packet,
// 1024 bytes (1KB) is therefore default.
DefaultMinSize = 1024
)
// GzipResponseWriter provides an http.ResponseWriter interface, which gzips
// bytes before writing them to the underlying response. This doesn't close the
// writers, so don't forget to do that.
// It can be configured to skip response smaller than minSize.
type GzipResponseWriter struct {
http.ResponseWriter
level int
gwFactory writer.GzipWriterFactory
gw writer.GzipWriter
code int // Saves the WriteHeader value.
minSize int // Specifies the minimum response size to gzip. If the response length is bigger than this value, it is compressed.
buf []byte // Holds the first part of the write before reaching the minSize or the end of the write.
ignore bool // If true, then we immediately passthru writes to the underlying ResponseWriter.
keepAcceptRanges bool // Keep "Accept-Ranges" header.
setContentType bool // Add content type, if missing and detected.
suffixETag string // Suffix to add to ETag header if response is compressed.
dropETag bool // Drop ETag header if response is compressed (supersedes suffixETag).
contentTypeFilter func(ct string) bool // Only compress if the response is one of these content-types. All are accepted if empty.
}
type GzipResponseWriterWithCloseNotify struct {
*GzipResponseWriter
}
func (w GzipResponseWriterWithCloseNotify) CloseNotify() <-chan bool {
return w.ResponseWriter.(http.CloseNotifier).CloseNotify()
}
// Write appends data to the gzip writer.
func (w *GzipResponseWriter) Write(b []byte) (int, error) {
// GZIP responseWriter is initialized. Use the GZIP responseWriter.
if w.gw != nil {
return w.gw.Write(b)
}
// If we have already decided not to use GZIP, immediately passthrough.
if w.ignore {
return w.ResponseWriter.Write(b)
}
// Save the write into a buffer for later use in GZIP responseWriter
// (if content is long enough) or at close with regular responseWriter.
wantBuf := 512
if w.minSize > wantBuf {
wantBuf = w.minSize
}
toAdd := len(b)
if len(w.buf)+toAdd > wantBuf {
toAdd = wantBuf - len(w.buf)
}
w.buf = append(w.buf, b[:toAdd]...)
remain := b[toAdd:]
hdr := w.Header()
// Only continue if they didn't already choose an encoding or a known unhandled content length or type.
if len(hdr[HeaderNoCompression]) == 0 && hdr.Get(contentEncoding) == "" && hdr.Get(contentRange) == "" {
// Check more expensive parts now.
cl, _ := atoi(hdr.Get(contentLength))
ct := hdr.Get(contentType)
if cl == 0 || cl >= w.minSize && (ct == "" || w.contentTypeFilter(ct)) {
// If the current buffer is less than minSize and a Content-Length isn't set, then wait until we have more data.
if len(w.buf) < w.minSize && cl == 0 {
return len(b), nil
}
// If the Content-Length is larger than minSize or the current buffer is larger than minSize, then continue.
if cl >= w.minSize || len(w.buf) >= w.minSize {
// If a Content-Type wasn't specified, infer it from the current buffer.
if ct == "" {
ct = http.DetectContentType(w.buf)
}
// Handles the intended case of setting a nil Content-Type (as for http/server or http/fs)
// Set the header only if the key does not exist
if _, ok := hdr[contentType]; w.setContentType && !ok {
hdr.Set(contentType, ct)
}
// If the Content-Type is acceptable to GZIP, initialize the GZIP writer.
if w.contentTypeFilter(ct) {
if err := w.startGzip(); err != nil {
return 0, err
}
if len(remain) > 0 {
if _, err := w.gw.Write(remain); err != nil {
return 0, err
}
}
return len(b), nil
}
}
}
}
// If we got here, we should not GZIP this response.
if err := w.startPlain(); err != nil {
return 0, err
}
if len(remain) > 0 {
if _, err := w.ResponseWriter.Write(remain); err != nil {
return 0, err
}
}
return len(b), nil
}
// startGzip initializes a GZIP writer and writes the buffer.
func (w *GzipResponseWriter) startGzip() error {
// Set the GZIP header.
w.Header().Set(contentEncoding, "gzip")
// if the Content-Length is already set, then calls to Write on gzip
// will fail to set the Content-Length header since its already set
// See: https://github.com/golang/go/issues/14975.
w.Header().Del(contentLength)
// Delete Accept-Ranges.
if !w.keepAcceptRanges {
w.Header().Del(acceptRanges)
}
// Suffix ETag.
if w.suffixETag != "" && !w.dropETag && w.Header().Get(eTag) != "" {
orig := w.Header().Get(eTag)
insertPoint := strings.LastIndex(orig, `"`)
if insertPoint == -1 {
insertPoint = len(orig)
}
w.Header().Set(eTag, orig[:insertPoint]+w.suffixETag+orig[insertPoint:])
}
// Delete ETag.
if w.dropETag {
w.Header().Del(eTag)
}
// Write the header to gzip response.
if w.code != 0 {
w.ResponseWriter.WriteHeader(w.code)
// Ensure that no other WriteHeader's happen
w.code = 0
}
// Initialize and flush the buffer into the gzip response if there are any bytes.
// If there aren't any, we shouldn't initialize it yet because on Close it will
// write the gzip header even if nothing was ever written.
if len(w.buf) > 0 {
// Initialize the GZIP response.
w.init()
n, err := w.gw.Write(w.buf)
// This should never happen (per io.Writer docs), but if the write didn't
// accept the entire buffer but returned no specific error, we have no clue
// what's going on, so abort just to be safe.
if err == nil && n < len(w.buf) {
err = io.ErrShortWrite
}
w.buf = w.buf[:0]
return err
}
return nil
}
// startPlain writes to sent bytes and buffer the underlying ResponseWriter without gzip.
func (w *GzipResponseWriter) startPlain() error {
w.Header().Del(HeaderNoCompression)
if w.code != 0 {
w.ResponseWriter.WriteHeader(w.code)
// Ensure that no other WriteHeader's happen
w.code = 0
}
w.ignore = true
// If Write was never called then don't call Write on the underlying ResponseWriter.
if len(w.buf) == 0 {
return nil
}
n, err := w.ResponseWriter.Write(w.buf)
// This should never happen (per io.Writer docs), but if the write didn't
// accept the entire buffer but returned no specific error, we have no clue
// what's going on, so abort just to be safe.
if err == nil && n < len(w.buf) {
err = io.ErrShortWrite
}
w.buf = w.buf[:0]
return err
}
// WriteHeader just saves the response code until close or GZIP effective writes.
func (w *GzipResponseWriter) WriteHeader(code int) {
if w.code == 0 {
w.code = code
}
}
// init graps a new gzip writer from the gzipWriterPool and writes the correct
// content encoding header.
func (w *GzipResponseWriter) init() {
// Bytes written during ServeHTTP are redirected to this gzip writer
// before being written to the underlying response.
w.gw = w.gwFactory.New(w.ResponseWriter, w.level)
}
// Close will close the gzip.Writer and will put it back in the gzipWriterPool.
func (w *GzipResponseWriter) Close() error {
if w.ignore {
return nil
}
if w.gw == nil {
// GZIP not triggered yet, write out regular response.
err := w.startPlain()
// Returns the error if any at write.
if err != nil {
err = fmt.Errorf("gziphandler: write to regular responseWriter at close gets error: %q", err.Error())
}
return err
}
err := w.gw.Close()
w.gw = nil
return err
}
// Flush flushes the underlying *gzip.Writer and then the underlying
// http.ResponseWriter if it is an http.Flusher. This makes GzipResponseWriter
// an http.Flusher.
// If not enough bytes has been written to determine if we have reached minimum size,
// this will be ignored.
// If nothing has been written yet, nothing will be flushed.
func (w *GzipResponseWriter) Flush() {
if w.gw == nil && !w.ignore {
if len(w.buf) == 0 {
// Nothing written yet.
return
}
var (
cl, _ = atoi(w.Header().Get(contentLength))
ct = w.Header().Get(contentType)
ce = w.Header().Get(contentEncoding)
cr = w.Header().Get(contentRange)
)
if ct == "" {
ct = http.DetectContentType(w.buf)
// Handles the intended case of setting a nil Content-Type (as for http/server or http/fs)
// Set the header only if the key does not exist
if _, ok := w.Header()[contentType]; w.setContentType && !ok {
w.Header().Set(contentType, ct)
}
}
if cl == 0 {
// Assume minSize.
cl = w.minSize
}
// See if we should compress...
if len(w.Header()[HeaderNoCompression]) == 0 && ce == "" && cr == "" && cl >= w.minSize && w.contentTypeFilter(ct) {
w.startGzip()
} else {
w.startPlain()
}
}
if w.gw != nil {
w.gw.Flush()
}
if fw, ok := w.ResponseWriter.(http.Flusher); ok {
fw.Flush()
}
}
// Hijack implements http.Hijacker. If the underlying ResponseWriter is a
// Hijacker, its Hijack method is returned. Otherwise an error is returned.
func (w *GzipResponseWriter) Hijack() (net.Conn, *bufio.ReadWriter, error) {
if hj, ok := w.ResponseWriter.(http.Hijacker); ok {
return hj.Hijack()
}
return nil, nil, fmt.Errorf("http.Hijacker interface is not supported")
}
// verify Hijacker interface implementation
var _ http.Hijacker = &GzipResponseWriter{}
var onceDefault sync.Once
var defaultWrapper func(http.Handler) http.HandlerFunc
// GzipHandler allows to easily wrap an http handler with default settings.
func GzipHandler(h http.Handler) http.HandlerFunc {
onceDefault.Do(func() {
var err error
defaultWrapper, err = NewWrapper()
if err != nil {
panic(err)
}
})
return defaultWrapper(h)
}
var grwPool = sync.Pool{New: func() interface{} { return &GzipResponseWriter{} }}
// NewWrapper returns a reusable wrapper with the supplied options.
func NewWrapper(opts ...option) (func(http.Handler) http.HandlerFunc, error) {
c := &config{
level: gzip.DefaultCompression,
minSize: DefaultMinSize,
writer: writer.GzipWriterFactory{
Levels: gzkp.Levels,
New: gzkp.NewWriter,
},
contentTypes: DefaultContentTypeFilter,
setContentType: true,
}
for _, o := range opts {
o(c)
}
if err := c.validate(); err != nil {
return nil, err
}
return func(h http.Handler) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
w.Header().Add(vary, acceptEncoding)
if acceptsGzip(r) {
gw := grwPool.Get().(*GzipResponseWriter)
*gw = GzipResponseWriter{
ResponseWriter: w,
gwFactory: c.writer,
level: c.level,
minSize: c.minSize,
contentTypeFilter: c.contentTypes,
keepAcceptRanges: c.keepAcceptRanges,
dropETag: c.dropETag,
suffixETag: c.suffixETag,
buf: gw.buf,
setContentType: c.setContentType,
}
if len(gw.buf) > 0 {
gw.buf = gw.buf[:0]
}
defer func() {
gw.Close()
gw.ResponseWriter = nil
grwPool.Put(gw)
}()
if _, ok := w.(http.CloseNotifier); ok {
gwcn := GzipResponseWriterWithCloseNotify{gw}
h.ServeHTTP(gwcn, r)
} else {
h.ServeHTTP(gw, r)
}
} else {
h.ServeHTTP(newNoGzipResponseWriter(w), r)
w.Header().Del(HeaderNoCompression)
}
}
}, nil
}
// Parsed representation of one of the inputs to ContentTypes.
// See https://golang.org/pkg/mime/#ParseMediaType
type parsedContentType struct {
mediaType string
params map[string]string
}
// equals returns whether this content type matches another content type.
func (pct parsedContentType) equals(mediaType string, params map[string]string) bool {
if pct.mediaType != mediaType {
return false
}
// if pct has no params, don't care about other's params
if len(pct.params) == 0 {
return true
}
// if pct has any params, they must be identical to other's.
if len(pct.params) != len(params) {
return false
}
for k, v := range pct.params {
if w, ok := params[k]; !ok || v != w {
return false
}
}
return true
}
// Used for functional configuration.
type config struct {
minSize int
level int
writer writer.GzipWriterFactory
contentTypes func(ct string) bool
keepAcceptRanges bool
setContentType bool
suffixETag string
dropETag bool
}
func (c *config) validate() error {
min, max := c.writer.Levels()
if c.level < min || c.level > max {
return fmt.Errorf("invalid compression level requested: %d, valid range %d -> %d", c.level, min, max)
}
if c.minSize < 0 {
return fmt.Errorf("minimum size must be more than zero")
}
return nil
}
type option func(c *config)
func MinSize(size int) option {
return func(c *config) {
c.minSize = size
}
}
// CompressionLevel sets the compression level
func CompressionLevel(level int) option {
return func(c *config) {
c.level = level
}
}
// SetContentType sets the content type before returning
// requests, if unset before returning, and it was detected.
// Default: true.
func SetContentType(b bool) option {
return func(c *config) {
c.setContentType = b
}
}
// Implementation changes the implementation of GzipWriter
//
// The default implementation is writer/stdlib/NewWriter
// which is backed by standard library's compress/zlib
func Implementation(writer writer.GzipWriterFactory) option {
return func(c *config) {
c.writer = writer
}
}
// ContentTypes specifies a list of content types to compare
// the Content-Type header to before compressing. If none
// match, the response will be returned as-is.
//
// Content types are compared in a case-insensitive, whitespace-ignored
// manner.
//
// A MIME type without any other directive will match a content type
// that has the same MIME type, regardless of that content type's other
// directives. I.e., "text/html" will match both "text/html" and
// "text/html; charset=utf-8".
//
// A MIME type with any other directive will only match a content type
// that has the same MIME type and other directives. I.e.,
// "text/html; charset=utf-8" will only match "text/html; charset=utf-8".
//
// By default common compressed audio, video and archive formats, see DefaultContentTypeFilter.
//
// Setting this will override default and any previous Content Type settings.
func ContentTypes(types []string) option {
return func(c *config) {
var contentTypes []parsedContentType
for _, v := range types {
mediaType, params, err := mime.ParseMediaType(v)
if err == nil {
contentTypes = append(contentTypes, parsedContentType{mediaType, params})
}
}
c.contentTypes = func(ct string) bool {
return handleContentType(contentTypes, ct)
}
}
}
// ExceptContentTypes specifies a list of content types to compare
// the Content-Type header to before compressing. If none
// match, the response will be compressed.
//
// Content types are compared in a case-insensitive, whitespace-ignored
// manner.
//
// A MIME type without any other directive will match a content type
// that has the same MIME type, regardless of that content type's other
// directives. I.e., "text/html" will match both "text/html" and
// "text/html; charset=utf-8".
//
// A MIME type with any other directive will only match a content type
// that has the same MIME type and other directives. I.e.,
// "text/html; charset=utf-8" will only match "text/html; charset=utf-8".
//
// By default common compressed audio, video and archive formats, see DefaultContentTypeFilter.
//
// Setting this will override default and any previous Content Type settings.
func ExceptContentTypes(types []string) option {
return func(c *config) {
var contentTypes []parsedContentType
for _, v := range types {
mediaType, params, err := mime.ParseMediaType(v)
if err == nil {
contentTypes = append(contentTypes, parsedContentType{mediaType, params})
}
}
c.contentTypes = func(ct string) bool {
return !handleContentType(contentTypes, ct)
}
}
}
// KeepAcceptRanges will keep Accept-Ranges header on gzipped responses.
// This will likely break ranged requests since that cannot be transparently
// handled by the filter.
func KeepAcceptRanges() option {
return func(c *config) {
c.keepAcceptRanges = true
}
}
// ContentTypeFilter allows adding a custom content type filter.
//
// The supplied function must return true/false to indicate if content
// should be compressed.
//
// When called no parsing of the content type 'ct' has been done.
// It may have been set or auto-detected.
//
// Setting this will override default and any previous Content Type settings.
func ContentTypeFilter(compress func(ct string) bool) option {
return func(c *config) {
c.contentTypes = compress
}
}
// SuffixETag adds the specified suffix to the ETag header (if it exists) of
// responses which are compressed.
//
// Per [RFC 7232 Section 2.3.3](https://www.rfc-editor.org/rfc/rfc7232#section-2.3.3),
// the ETag of a compressed response must differ from it's uncompressed version.
//
// A suffix such as "-gzip" is sometimes used as a workaround for generating a
// unique new ETag (see https://bz.apache.org/bugzilla/show_bug.cgi?id=39727).
func SuffixETag(suffix string) option {
return func(c *config) {
c.suffixETag = suffix
}
}
// DropETag removes the ETag of responses which are compressed. If DropETag is
// specified in conjunction with SuffixETag, this option will take precedence
// and the ETag will be dropped.
//
// Per [RFC 7232 Section 2.3.3](https://www.rfc-editor.org/rfc/rfc7232#section-2.3.3),
// the ETag of a compressed response must differ from it's uncompressed version.
//
// This workaround eliminates ETag conflicts between the compressed and
// uncompressed versions by removing the ETag from the compressed version.
func DropETag() option {
return func(c *config) {
c.dropETag = true
}
}
// acceptsGzip returns true if the given HTTP request indicates that it will
// accept a gzipped response.
func acceptsGzip(r *http.Request) bool {
// Note that we don't request this for HEAD requests,
// due to a bug in nginx:
// https://trac.nginx.org/nginx/ticket/358
// https://golang.org/issue/5522
return r.Method != http.MethodHead && parseEncodingGzip(r.Header.Get(acceptEncoding)) > 0
}
// returns true if we've been configured to compress the specific content type.
func handleContentType(contentTypes []parsedContentType, ct string) bool {
// If contentTypes is empty we handle all content types.
if len(contentTypes) == 0 {
return true
}
mediaType, params, err := mime.ParseMediaType(ct)
if err != nil {
return false
}
for _, c := range contentTypes {
if c.equals(mediaType, params) {
return true
}
}
return false
}
// parseEncodingGzip returns the qvalue of gzip compression.
func parseEncodingGzip(s string) float64 {
s = strings.TrimSpace(s)
for len(s) > 0 {
stop := strings.IndexByte(s, ',')
if stop < 0 {
stop = len(s)
}
coding, qvalue, _ := parseCoding(s[:stop])
if coding == "gzip" {
return qvalue
}
if stop == len(s) {
break
}
s = s[stop+1:]
}
return 0
}
func parseEncodings(s string) (codings, error) {
split := strings.Split(s, ",")
c := make(codings, len(split))
var e []string
for _, ss := range split {
coding, qvalue, err := parseCoding(ss)
if err != nil {
e = append(e, err.Error())
} else {
c[coding] = qvalue
}
}
// TODO (adammck): Use a proper multi-error struct, so the individual errors
// can be extracted if anyone cares.
if len(e) > 0 {
return c, fmt.Errorf("errors while parsing encodings: %s", strings.Join(e, ", "))
}
return c, nil
}
// parseCoding parses a single coding (content-coding with an optional qvalue),
// as might appear in an Accept-Encoding header. It attempts to forgive minor
// formatting errors.
func parseCoding(s string) (coding string, qvalue float64, err error) {
for n, part := range strings.Split(s, ";") {
part = strings.TrimSpace(part)
qvalue = DefaultQValue
if n == 0 {
coding = strings.ToLower(part)
} else if strings.HasPrefix(part, "q=") {
qvalue, err = strconv.ParseFloat(strings.TrimPrefix(part, "q="), 64)
if qvalue < 0.0 {
qvalue = 0.0
} else if qvalue > 1.0 {
qvalue = 1.0
}
}
}
if coding == "" {
err = fmt.Errorf("empty content-coding")
}
return
}
// Don't compress any audio/video types.
var excludePrefixDefault = []string{"video/", "audio/", "image/jp"}
// Skip a bunch of compressed types that contains this string.
// Curated by supposedly still active formats on https://en.wikipedia.org/wiki/List_of_archive_formats
var excludeContainsDefault = []string{"compress", "zip", "snappy", "lzma", "xz", "zstd", "brotli", "stuffit"}
// DefaultContentTypeFilter excludes common compressed audio, video and archive formats.
func DefaultContentTypeFilter(ct string) bool {
ct = strings.TrimSpace(strings.ToLower(ct))
if ct == "" {
return true
}
for _, s := range excludeContainsDefault {
if strings.Contains(ct, s) {
return false
}
}
for _, prefix := range excludePrefixDefault {
if strings.HasPrefix(ct, prefix) {
return false
}
}
return true
}
// CompressAllContentTypeFilter will compress all mime types.
func CompressAllContentTypeFilter(ct string) bool {
return true
}
const intSize = 32 << (^uint(0) >> 63)
// atoi is equivalent to ParseInt(s, 10, 0), converted to type int.
func atoi(s string) (int, bool) {
sLen := len(s)
if intSize == 32 && (0 < sLen && sLen < 10) ||
intSize == 64 && (0 < sLen && sLen < 19) {
// Fast path for small integers that fit int type.
s0 := s
if s[0] == '-' || s[0] == '+' {
s = s[1:]
if len(s) < 1 {
return 0, false
}
}
n := 0
for _, ch := range []byte(s) {
ch -= '0'
if ch > 9 {
return 0, false
}
n = n*10 + int(ch)
}
if s0[0] == '-' {
n = -n
}
return n, true
}
// Slow path for invalid, big, or underscored integers.
i64, err := strconv.ParseInt(s, 10, 0)
return int(i64), err == nil
}
// newNoGzipResponseWriter will return a response writer that
// cleans up compression artifacts.
// Depending on whether http.Hijacker is supported the returned will as well.
func newNoGzipResponseWriter(w http.ResponseWriter) http.ResponseWriter {
n := &NoGzipResponseWriter{ResponseWriter: w}
if hj, ok := w.(http.Hijacker); ok {
x := struct {
http.ResponseWriter
http.Hijacker
http.Flusher
}{
ResponseWriter: n,
Hijacker: hj,
Flusher: n,
}
return x
}
return n
}
// NoGzipResponseWriter filters out HeaderNoCompression.
type NoGzipResponseWriter struct {
http.ResponseWriter
hdrCleaned bool
}
func (n *NoGzipResponseWriter) CloseNotify() <-chan bool {
if cn, ok := n.ResponseWriter.(http.CloseNotifier); ok {
return cn.CloseNotify()
}
return nil
}
func (n *NoGzipResponseWriter) Flush() {
if !n.hdrCleaned {
n.ResponseWriter.Header().Del(HeaderNoCompression)
n.hdrCleaned = true
}
if f, ok := n.ResponseWriter.(http.Flusher); ok {
f.Flush()
}
}
func (n *NoGzipResponseWriter) Header() http.Header {
return n.ResponseWriter.Header()
}
func (n *NoGzipResponseWriter) Write(bytes []byte) (int, error) {
if !n.hdrCleaned {
n.ResponseWriter.Header().Del(HeaderNoCompression)
n.hdrCleaned = true
}
return n.ResponseWriter.Write(bytes)
}
func (n *NoGzipResponseWriter) WriteHeader(statusCode int) {
if !n.hdrCleaned {
n.ResponseWriter.Header().Del(HeaderNoCompression)
n.hdrCleaned = true
}
n.ResponseWriter.WriteHeader(statusCode)
}