minio/internal/etag/etag.go

420 lines
13 KiB
Go

// Copyright (c) 2015-2021 MinIO, Inc.
//
// This file is part of MinIO Object Storage stack
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
// Package etag provides an implementation of S3 ETags.
//
// Each S3 object has an associated ETag that can be
// used to e.g. quickly compare objects or check whether
// the content of an object has changed.
//
// In general, an S3 ETag is an MD5 checksum of the object
// content. However, there are many exceptions to this rule.
//
//
// Single-part Upload
//
// In case of a basic single-part PUT operation - without server
// side encryption or object compression - the ETag of an object
// is its content MD5.
//
//
// Multi-part Upload
//
// The ETag of an object does not correspond to its content MD5
// when the object is uploaded in multiple parts via the S3
// multipart API. Instead, S3 first computes a MD5 of each part:
// e1 := MD5(part-1)
// e2 := MD5(part-2)
// ...
// eN := MD5(part-N)
//
// Then, the ETag of the object is computed as MD5 of all individual
// part checksums. S3 also encodes the number of parts into the ETag
// by appending a -<number-of-parts> at the end:
// ETag := MD5(e1 || e2 || e3 ... || eN) || -N
//
// For example: ceb8853ddc5086cc4ab9e149f8f09c88-5
//
// However, this scheme is only used for multipart objects that are
// not encrypted.
//
// Server-side Encryption
//
// S3 specifies three types of server-side-encryption - SSE-C, SSE-S3
// and SSE-KMS - with different semantics w.r.t. ETags.
// In case of SSE-S3, the ETag of an object is computed the same as
// for single resp. multipart plaintext objects. In particular,
// the ETag of a singlepart SSE-S3 object is its content MD5.
//
// In case of SSE-C and SSE-KMS, the ETag of an object is computed
// differently. For singlepart uploads the ETag is not the content
// MD5 of the object. For multipart uploads the ETag is also not
// the MD5 of the individual part checksums but it still contains
// the number of parts as suffix.
//
// Instead, the ETag is kind of unpredictable for S3 clients when
// an object is encrypted using SSE-C or SSE-KMS. Maybe AWS S3
// computes the ETag as MD5 of the encrypted content but there is
// no way to verify this assumption since the encryption happens
// inside AWS S3.
// Therefore, S3 clients must not make any assumption about ETags
// in case of SSE-C or SSE-KMS except that the ETag is well-formed.
//
// To put all of this into a simple rule:
// SSE-S3 : ETag == MD5
// SSE-C : ETag != MD5
// SSE-KMS: ETag != MD5
//
//
// Encrypted ETags
//
// An S3 implementation has to remember the content MD5 of objects
// in case of SSE-S3. However, storing the ETag of an encrypted
// object in plaintext may reveal some information about the object.
// For example, two objects with the same ETag are identical with
// a very high probability.
//
// Therefore, an S3 implementation may encrypt an ETag before storing
// it. In this case, the stored ETag may not be a well-formed S3 ETag.
// For example, it can be larger due to a checksum added by authenticated
// encryption schemes. Such an ETag must be decrypted before sent to an
// S3 client.
//
//
// S3 Clients
//
// There are many different S3 client implementations. Most of them
// access the ETag by looking for the HTTP response header key "Etag".
// However, some of them assume that the header key has to be "ETag"
// (case-sensitive) and will fail otherwise.
// Further, some clients require that the ETag value is a double-quoted
// string. Therefore, this package provides dedicated functions for
// adding and extracing the ETag to/from HTTP headers.
package etag
import (
"bytes"
"crypto/hmac"
"crypto/md5"
"encoding/base64"
"encoding/hex"
"errors"
"fmt"
"net/http"
"strconv"
"strings"
"github.com/minio/minio/internal/fips"
"github.com/minio/minio/internal/hash/sha256"
"github.com/minio/sio"
)
// ETag is a single S3 ETag.
//
// An S3 ETag sometimes corresponds to the MD5 of
// the S3 object content. However, when an object
// is encrypted, compressed or uploaded using
// the S3 multipart API then its ETag is not
// necessarily the MD5 of the object content.
//
// For a more detailed description of S3 ETags
// take a look at the package documentation.
type ETag []byte
// String returns the string representation of the ETag.
//
// The returned string is a hex representation of the
// binary ETag with an optional '-<part-number>' suffix.
func (e ETag) String() string {
if e.IsMultipart() {
return hex.EncodeToString(e[:16]) + string(e[16:])
}
return hex.EncodeToString(e)
}
// IsEncrypted reports whether the ETag is encrypted.
func (e ETag) IsEncrypted() bool {
// An encrypted ETag must be at least 32 bytes long.
// It contains the encrypted ETag value + an authentication
// code generated by the AEAD cipher.
//
// Here is an incorrect implementation of IsEncrypted:
//
// return len(e) > 16 && !bytes.ContainsRune(e, '-')
//
// An encrypted ETag may contain some random bytes - e.g.
// and nonce value. This nonce value may contain a '-'
// just by its nature of being randomly generated.
// The above implementation would incorrectly consider
// such an ETag (with a nonce value containing a '-')
// as non-encrypted.
return len(e) >= 32 // We consider all ETags longer than 32 bytes as encrypted
}
// IsMultipart reports whether the ETag belongs to an
// object that has been uploaded using the S3 multipart
// API.
// An S3 multipart ETag has a -<part-number> suffix.
func (e ETag) IsMultipart() bool {
return len(e) > 16 && !e.IsEncrypted() && bytes.ContainsRune(e, '-')
}
// Parts returns the number of object parts that are
// referenced by this ETag. It returns 1 if the object
// has been uploaded using the S3 singlepart API.
//
// Parts may panic if the ETag is an invalid multipart
// ETag.
func (e ETag) Parts() int {
if !e.IsMultipart() {
return 1
}
n := bytes.IndexRune(e, '-')
parts, err := strconv.Atoi(string(e[n+1:]))
if err != nil {
panic(err) // malformed ETag
}
return parts
}
// Format returns an ETag that is formatted as specified
// by AWS S3.
//
// An AWS S3 ETag is 16 bytes long and, in case of a multipart
// upload, has a `-N` suffix encoding the number of object parts.
// An ETag is not AWS S3 compatible when encrypted. When sending
// an ETag back to an S3 client it has to be formatted to be
// AWS S3 compatible.
//
// Therefore, Format returns the last 16 bytes of an encrypted
// ETag.
//
// In general, a caller has to distinguish the following cases:
// - The object is a multipart object. In this case,
// Format returns the ETag unmodified.
// - The object is a SSE-KMS or SSE-C encrypted single-
// part object. In this case, Format returns the last
// 16 bytes of the encrypted ETag which will be a random
// value.
// - The object is a SSE-S3 encrypted single-part object.
// In this case, the caller has to decrypt the ETag first
// before calling Format.
// S3 clients expect that the ETag of an SSE-S3 encrypted
// single-part object is equal to the object's content MD5.
// Formatting the SSE-S3 ETag before decryption will result
// in a random-looking ETag which an S3 client will not accept.
//
// Hence, a caller has to check:
// if method == SSE-S3 {
// ETag, err := Decrypt(key, ETag)
// if err != nil {
// }
// }
// ETag = ETag.Format()
func (e ETag) Format() ETag {
if !e.IsEncrypted() {
return e
}
return e[len(e)-16:]
}
var _ Tagger = ETag{} // compiler check
// ETag returns the ETag itself.
//
// By providing this method ETag implements
// the Tagger interface.
func (e ETag) ETag() ETag { return e }
// FromContentMD5 decodes and returns the Content-MD5
// as ETag, if set. If no Content-MD5 header is set
// it returns an empty ETag and no error.
func FromContentMD5(h http.Header) (ETag, error) {
v, ok := h["Content-Md5"]
if !ok {
return nil, nil
}
if v[0] == "" {
return nil, errors.New("etag: content-md5 is set but contains no value")
}
b, err := base64.StdEncoding.Strict().DecodeString(v[0])
if err != nil {
return nil, err
}
if len(b) != md5.Size {
return nil, errors.New("etag: invalid content-md5")
}
return ETag(b), nil
}
// Multipart computes an S3 multipart ETag given a list of
// S3 singlepart ETags. It returns nil if the list of
// ETags is empty.
//
// Any encrypted or multipart ETag will be ignored and not
// used to compute the returned ETag.
func Multipart(etags ...ETag) ETag {
if len(etags) == 0 {
return nil
}
var n int64
h := md5.New()
for _, etag := range etags {
if !etag.IsMultipart() && !etag.IsEncrypted() {
h.Write(etag)
n++
}
}
etag := append(h.Sum(nil), '-')
return strconv.AppendInt(etag, n, 10)
}
// Set adds the ETag to the HTTP headers. It overwrites any
// existing ETag entry.
//
// Due to legacy S3 clients, that make incorrect assumptions
// about HTTP headers, Set should be used instead of
// http.Header.Set(...). Otherwise, some S3 clients will not
// able to extract the ETag.
func Set(etag ETag, h http.Header) {
// Some (broken) S3 clients expect the ETag header to
// literally "ETag" - not "Etag". Further, some clients
// expect an ETag in double quotes. Therefore, we set the
// ETag directly as map entry instead of using http.Header.Set
h["ETag"] = []string{`"` + etag.String() + `"`}
}
// Get extracts and parses an ETag from the given HTTP headers.
// It returns an error when the HTTP headers do not contain
// an ETag entry or when the ETag is malformed.
//
// Get only accepts AWS S3 compatible ETags - i.e. no
// encrypted ETags - and therefore is stricter than Parse.
func Get(h http.Header) (ETag, error) {
const strict = true
if v := h.Get("Etag"); v != "" {
return parse(v, strict)
}
v, ok := h["ETag"]
if !ok || len(v) == 0 {
return nil, errors.New("etag: HTTP header does not contain an ETag")
}
return parse(v[0], strict)
}
// Equal returns true if and only if the two ETags are
// identical.
func Equal(a, b ETag) bool { return bytes.Equal(a, b) }
// Decrypt decrypts the ETag with the given key.
//
// If the ETag is not encrypted, Decrypt returns
// the ETag unmodified.
func Decrypt(key []byte, etag ETag) (ETag, error) {
const HMACContext = "SSE-etag"
if !etag.IsEncrypted() {
return etag, nil
}
mac := hmac.New(sha256.New, key)
mac.Write([]byte(HMACContext))
decryptionKey := mac.Sum(nil)
plaintext := make([]byte, 0, 16)
etag, err := sio.DecryptBuffer(plaintext, etag, sio.Config{
Key: decryptionKey,
CipherSuites: fips.CipherSuitesDARE(),
})
if err != nil {
return nil, err
}
return etag, nil
}
// Parse parses s as an S3 ETag, returning the result.
// The string can be an encrypted, singlepart
// or multipart S3 ETag. It returns an error if s is
// not a valid textual representation of an ETag.
func Parse(s string) (ETag, error) {
const strict = false
return parse(s, strict)
}
// parse parse s as an S3 ETag, returning the result.
// It operates in one of two modes:
// - strict
// - non-strict
//
// In strict mode, parse only accepts ETags that
// are AWS S3 compatible. In particular, an AWS
// S3 ETag always consists of a 128 bit checksum
// value and an optional -<part-number> suffix.
// Therefore, s must have the following form in
// strict mode: <32-hex-characters>[-<integer>]
//
// In non-strict mode, parse also accepts ETags
// that are not AWS S3 compatible - e.g. encrypted
// ETags.
func parse(s string, strict bool) (ETag, error) {
// An S3 ETag may be a double-quoted string.
// Therefore, we remove double quotes at the
// start and end, if any.
if strings.HasPrefix(s, `"`) && strings.HasSuffix(s, `"`) {
s = s[1 : len(s)-1]
}
// An S3 ETag may be a multipart ETag that
// contains a '-' followed by a number.
// If the ETag does not a '-' is is either
// a singlepart or encrypted ETag.
n := strings.IndexRune(s, '-')
if n == -1 {
etag, err := hex.DecodeString(s)
if err != nil {
return nil, err
}
if strict && len(etag) != 16 { // AWS S3 ETags are always 128 bit long
return nil, fmt.Errorf("etag: invalid length %d", len(etag))
}
return ETag(etag), nil
}
prefix, suffix := s[:n], s[n:]
if len(prefix) != 32 {
return nil, fmt.Errorf("etag: invalid prefix length %d", len(prefix))
}
if len(suffix) <= 1 {
return nil, errors.New("etag: suffix is not a part number")
}
etag, err := hex.DecodeString(prefix)
if err != nil {
return nil, err
}
partNumber, err := strconv.Atoi(suffix[1:]) // suffix[0] == '-' Therefore, we start parsing at suffix[1]
if err != nil {
return nil, err
}
if strict && (partNumber == 0 || partNumber > 10000) {
return nil, fmt.Errorf("etag: invalid part number %d", partNumber)
}
return ETag(append(etag, suffix...)), nil
}