make renameData() more defensive during overwrites (#19548)

instead upon any error in renameData(), we still
preserve the existing dataDir in some form for
recoverability in strange situations such as out
of disk space type errors.

Bonus: avoid running list and heal() instead allow
versions disparity to return the actual versions,
uuid to heal. Currently limit this to 100 versions
and lesser disparate objects.

an undo now reverts back the xl.meta from xl.meta.bkp
during overwrites on such flaky setups.

Bonus: Save N depth syscalls via skipping the parents
upon overwrites and versioned updates.

Flaky setup examples are stretch clusters with regular
packet drops etc, we need to add some defensive code
around to avoid dangling objects.
This commit is contained in:
Harshavardhana 2024-04-23 10:15:52 -07:00 committed by GitHub
parent ee1047bd52
commit 9693c382a8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
22 changed files with 460 additions and 282 deletions

View File

@ -24,10 +24,7 @@ if [ ! -f ./mc ]; then
chmod +x mc
fi
(
cd ./docs/debugging/s3-check-md5
go install -v
)
go install -v github.com/minio/minio/docs/debugging/s3-check-md5@latest
export RELEASE=RELEASE.2023-08-29T23-07-35Z

View File

@ -87,10 +87,7 @@ function verify_rewrite() {
exit 1
fi
(
cd ./docs/debugging/s3-check-md5
go install -v
)
go install -v github.com/minio/minio/docs/debugging/s3-check-md5@latest
if ! s3-check-md5 \
-debug \

View File

@ -19,6 +19,7 @@ package cmd
import (
"context"
"encoding/binary"
"errors"
"hash/crc32"
@ -26,14 +27,17 @@ import (
)
// figure out the most commonVersions across disk that satisfies
// the 'writeQuorum' this function returns '0' if quorum cannot
// the 'writeQuorum' this function returns "" if quorum cannot
// be achieved and disks have too many inconsistent versions.
func reduceCommonVersions(diskVersions []uint64, writeQuorum int) (commonVersions uint64) {
func reduceCommonVersions(diskVersions [][]byte, writeQuorum int) (versions []byte) {
diskVersionsCount := make(map[uint64]int)
for _, versions := range diskVersions {
diskVersionsCount[versions]++
if len(versions) > 0 {
diskVersionsCount[binary.BigEndian.Uint64(versions)]++
}
}
var commonVersions uint64
max := 0
for versions, count := range diskVersionsCount {
if max < count {
@ -43,10 +47,38 @@ func reduceCommonVersions(diskVersions []uint64, writeQuorum int) (commonVersion
}
if max >= writeQuorum {
return commonVersions
for _, versions := range diskVersions {
if binary.BigEndian.Uint64(versions) == commonVersions {
return versions
}
}
}
return 0
return []byte{}
}
// figure out the most commonVersions across disk that satisfies
// the 'writeQuorum' this function returns '0' if quorum cannot
// be achieved and disks have too many inconsistent versions.
func reduceCommonDataDir(dataDirs []string, writeQuorum int) (dataDir string) {
dataDirsCount := make(map[string]int)
for _, ddir := range dataDirs {
dataDirsCount[ddir]++
}
max := 0
for ddir, count := range dataDirsCount {
if max < count {
max = count
dataDir = ddir
}
}
if max >= writeQuorum {
return dataDir
}
return ""
}
// Returns number of errors that occurred the most (incl. nil) and the

View File

@ -1311,24 +1311,28 @@ func (er erasureObjects) CompleteMultipartUpload(ctx context.Context, bucket str
}()
// Rename the multipart object to final location.
onlineDisks, versionsDisparity, err := renameData(ctx, onlineDisks, minioMetaMultipartBucket, uploadIDPath,
onlineDisks, versions, oldDataDir, err := renameData(ctx, onlineDisks, minioMetaMultipartBucket, uploadIDPath,
partsMetadata, bucket, object, writeQuorum)
if err != nil {
return oi, toObjectErr(err, bucket, object)
}
if !opts.Speedtest && versionsDisparity {
if err = er.commitRenameDataDir(ctx, bucket, object, oldDataDir, onlineDisks); err != nil {
return ObjectInfo{}, toObjectErr(err, bucket, object)
}
if !opts.Speedtest && len(versions) > 0 {
globalMRFState.addPartialOp(partialOperation{
bucket: bucket,
object: object,
queued: time.Now(),
allVersions: true,
setIndex: er.setIndex,
poolIndex: er.poolIndex,
bucket: bucket,
object: object,
queued: time.Now(),
versions: versions,
setIndex: er.setIndex,
poolIndex: er.poolIndex,
})
}
if !opts.Speedtest && !versionsDisparity {
if !opts.Speedtest && len(versions) == 0 {
// Check if there is any offline disk and add it to the MRF list
for _, disk := range onlineDisks {
if disk != nil && disk.IsOnline() {

View File

@ -48,7 +48,6 @@ import (
"github.com/minio/minio/internal/logger"
"github.com/minio/pkg/v2/mimedb"
"github.com/minio/pkg/v2/sync/errgroup"
"github.com/minio/pkg/v2/wildcard"
)
// list all errors which can be ignored in object operations.
@ -1006,7 +1005,7 @@ func (er erasureObjects) getObjectInfoAndQuorum(ctx context.Context, bucket, obj
}
// Similar to rename but renames data from srcEntry to dstEntry at dataDir
func renameData(ctx context.Context, disks []StorageAPI, srcBucket, srcEntry string, metadata []FileInfo, dstBucket, dstEntry string, writeQuorum int) ([]StorageAPI, bool, error) {
func renameData(ctx context.Context, disks []StorageAPI, srcBucket, srcEntry string, metadata []FileInfo, dstBucket, dstEntry string, writeQuorum int) ([]StorageAPI, []byte, string, error) {
g := errgroup.WithNErrs(len(disks))
fvID := mustGetUUID()
@ -1014,7 +1013,8 @@ func renameData(ctx context.Context, disks []StorageAPI, srcBucket, srcEntry str
metadata[index].SetTierFreeVersionID(fvID)
}
diskVersions := make([]uint64, len(disks))
diskVersions := make([][]byte, len(disks))
dataDirs := make([]string, len(disks))
// Rename file on all underlying storage disks.
for index := range disks {
index := index
@ -1033,11 +1033,12 @@ func renameData(ctx context.Context, disks []StorageAPI, srcBucket, srcEntry str
if !fi.IsValid() {
return errFileCorrupt
}
sign, err := disks[index].RenameData(ctx, srcBucket, srcEntry, fi, dstBucket, dstEntry, RenameOptions{})
resp, err := disks[index].RenameData(ctx, srcBucket, srcEntry, fi, dstBucket, dstEntry, RenameOptions{})
if err != nil {
return err
}
diskVersions[index] = sign
diskVersions[index] = resp.Sign
dataDirs[index] = resp.OldDataDir
return nil
}, index)
}
@ -1045,8 +1046,6 @@ func renameData(ctx context.Context, disks []StorageAPI, srcBucket, srcEntry str
// Wait for all renames to finish.
errs := g.Wait()
var versionsDisparity bool
err := reduceWriteQuorumErrs(ctx, errs, objectOpIgnoredErrs, writeQuorum)
if err != nil {
dg := errgroup.WithNErrs(len(disks))
@ -1060,27 +1059,35 @@ func renameData(ctx context.Context, disks []StorageAPI, srcBucket, srcEntry str
// caller this dangling object will be now scheduled to be removed
// via active healing.
dg.Go(func() error {
return disks[index].DeleteVersion(context.Background(), dstBucket, dstEntry, metadata[index], false, DeleteOptions{UndoWrite: true})
return disks[index].DeleteVersion(context.Background(), dstBucket, dstEntry, metadata[index], false, DeleteOptions{
UndoWrite: true,
OldDataDir: dataDirs[index],
})
}, index)
}
dg.Wait()
}
var dataDir string
var versions []byte
if err == nil {
versions := reduceCommonVersions(diskVersions, writeQuorum)
versions = reduceCommonVersions(diskVersions, writeQuorum)
for index, dversions := range diskVersions {
if errs[index] != nil {
continue
}
if versions != dversions {
versionsDisparity = true
if !bytes.Equal(dversions, versions) {
if len(dversions) > len(versions) {
versions = dversions
}
break
}
}
dataDir = reduceCommonDataDir(dataDirs, writeQuorum)
}
// We can safely allow RenameData errors up to len(er.getDisks()) - writeQuorum
// otherwise return failure.
return evalDisks(disks, errs), versionsDisparity, err
return evalDisks(disks, errs), versions, dataDir, err
}
func (er erasureObjects) putMetacacheObject(ctx context.Context, key string, r *PutObjReader, opts ObjectOptions) (objInfo ObjectInfo, err error) {
@ -1229,44 +1236,6 @@ func (er erasureObjects) PutObject(ctx context.Context, bucket string, object st
return er.putObject(ctx, bucket, object, data, opts)
}
// Heal up to two versions of one object when there is disparity between disks
func healObjectVersionsDisparity(bucket string, entry metaCacheEntry, scanMode madmin.HealScanMode) error {
if entry.isDir() {
return nil
}
// We might land at .metacache, .trash, .multipart
// no need to heal them skip, only when bucket
// is '.minio.sys'
if bucket == minioMetaBucket {
if wildcard.Match("buckets/*/.metacache/*", entry.name) {
return nil
}
if wildcard.Match("tmp/*", entry.name) {
return nil
}
if wildcard.Match("multipart/*", entry.name) {
return nil
}
if wildcard.Match("tmp-old/*", entry.name) {
return nil
}
}
fivs, err := entry.fileInfoVersions(bucket)
if err != nil {
healObject(bucket, entry.name, "", madmin.HealDeepScan)
return err
}
if len(fivs.Versions) <= 2 {
for _, version := range fivs.Versions {
healObject(bucket, entry.name, version.VersionID, scanMode)
}
}
return nil
}
// putObject wrapper for erasureObjects PutObject
func (er erasureObjects) putObject(ctx context.Context, bucket string, object string, r *PutObjReader, opts ObjectOptions) (objInfo ObjectInfo, err error) {
if !opts.NoAuditLog {
@ -1547,7 +1516,7 @@ func (er erasureObjects) putObject(ctx context.Context, bucket string, object st
}
// Rename the successfully written temporary object to final location.
onlineDisks, versionsDisparity, err := renameData(ctx, onlineDisks, minioMetaTmpBucket, tempObj, partsMetadata, bucket, object, writeQuorum)
onlineDisks, versions, oldDataDir, err := renameData(ctx, onlineDisks, minioMetaTmpBucket, tempObj, partsMetadata, bucket, object, writeQuorum)
if err != nil {
if errors.Is(err, errFileNotFound) {
return ObjectInfo{}, toObjectErr(errErasureWriteQuorum, bucket, object)
@ -1555,6 +1524,10 @@ func (er erasureObjects) putObject(ctx context.Context, bucket string, object st
return ObjectInfo{}, toObjectErr(err, bucket, object)
}
if err = er.commitRenameDataDir(ctx, bucket, object, oldDataDir, onlineDisks); err != nil {
return ObjectInfo{}, toObjectErr(err, bucket, object)
}
for i := 0; i < len(onlineDisks); i++ {
if onlineDisks[i] != nil && onlineDisks[i].IsOnline() {
// Object info is the same in all disks, so we can pick
@ -1569,7 +1542,7 @@ func (er erasureObjects) putObject(ctx context.Context, bucket string, object st
// When there is versions disparity we are healing
// the content implicitly for all versions, we can
// avoid triggering another MRF heal for offline drives.
if !versionsDisparity {
if len(versions) == 0 {
// Whether a disk was initially or becomes offline
// during this upload, send it to the MRF list.
for i := 0; i < len(onlineDisks); i++ {
@ -1582,12 +1555,12 @@ func (er erasureObjects) putObject(ctx context.Context, bucket string, object st
}
} else {
globalMRFState.addPartialOp(partialOperation{
bucket: bucket,
object: object,
queued: time.Now(),
allVersions: true,
setIndex: er.setIndex,
poolIndex: er.poolIndex,
bucket: bucket,
object: object,
queued: time.Now(),
versions: versions,
setIndex: er.setIndex,
poolIndex: er.poolIndex,
})
}
}
@ -1797,6 +1770,30 @@ func (er erasureObjects) DeleteObjects(ctx context.Context, bucket string, objec
return dobjects, errs
}
func (er erasureObjects) commitRenameDataDir(ctx context.Context, bucket, object, dataDir string, onlineDisks []StorageAPI) error {
if dataDir == "" {
return nil
}
g := errgroup.WithNErrs(len(onlineDisks))
for index := range onlineDisks {
index := index
g.Go(func() error {
if onlineDisks[index] == nil {
return nil
}
return onlineDisks[index].Delete(ctx, bucket, pathJoin(object, dataDir), DeleteOptions{
Recursive: true,
})
}, index)
}
for _, err := range g.Wait() {
if err != nil {
return err
}
}
return nil
}
func (er erasureObjects) deletePrefix(ctx context.Context, bucket, prefix string) error {
disks := er.getDisks()
g := errgroup.WithNErrs(len(disks))

View File

@ -530,16 +530,7 @@ func healObject(bucket, object, versionID string, scan madmin.HealScanMode) erro
// Get background heal sequence to send elements to heal
bgSeq, ok := globalBackgroundHealState.getHealSequenceByToken(bgHealingUUID)
if ok {
return bgSeq.queueHealTask(healSource{
bucket: bucket,
object: object,
versionID: versionID,
noWait: true, // do not block callers.
opts: &madmin.HealOpts{
Remove: healDeleteDangling, // if found dangling purge it.
ScanMode: scan,
},
}, madmin.HealItemObject)
return bgSeq.healObject(bucket, object, versionID, scan)
}
return nil
}

View File

@ -21,6 +21,7 @@ import (
"context"
"time"
"github.com/google/uuid"
"github.com/minio/madmin-go/v3"
"github.com/minio/pkg/v2/wildcard"
)
@ -35,7 +36,7 @@ type partialOperation struct {
bucket string
object string
versionID string
allVersions bool
versions []byte
setIndex, poolIndex int
queued time.Time
scanMode madmin.HealScanMode
@ -111,8 +112,13 @@ func (m *mrfState) healRoutine(z *erasureServerPools) {
if u.object == "" {
healBucket(u.bucket, scan)
} else {
if u.allVersions {
z.serverPools[u.poolIndex].sets[u.setIndex].listAndHeal(u.bucket, u.object, u.scanMode, healObjectVersionsDisparity)
if len(u.versions) > 0 {
vers := len(u.versions) / 16
if vers > 0 {
for i := 0; i < vers; i++ {
healObject(u.bucket, u.object, uuid.UUID(u.versions[16*i:]).String(), scan)
}
}
} else {
healObject(u.bucket, u.object, u.versionID, scan)
}

View File

@ -201,9 +201,9 @@ func (d *naughtyDisk) AppendFile(ctx context.Context, volume string, path string
return d.disk.AppendFile(ctx, volume, path, buf)
}
func (d *naughtyDisk) RenameData(ctx context.Context, srcVolume, srcPath string, fi FileInfo, dstVolume, dstPath string, opts RenameOptions) (uint64, error) {
func (d *naughtyDisk) RenameData(ctx context.Context, srcVolume, srcPath string, fi FileInfo, dstVolume, dstPath string, opts RenameOptions) (RenameDataResp, error) {
if err := d.calcError(); err != nil {
return 0, err
return RenameDataResp{}, err
}
return d.disk.RenameData(ctx, srcVolume, srcPath, fi, dstVolume, dstPath, opts)
}

View File

@ -33,6 +33,8 @@ type DeleteOptions struct {
Recursive bool `msg:"r"`
Immediate bool `msg:"i"`
UndoWrite bool `msg:"u"`
// OldDataDir of the previous object
OldDataDir string `msg:"o,omitempty"` // old data dir used only when to revert a rename()
}
// BaseOptions represents common options for all Storage API calls
@ -490,8 +492,14 @@ type WriteAllHandlerParams struct {
}
// RenameDataResp - RenameData()'s response.
// Provides information about the final state of Rename()
// - on xl.meta (array of versions) on disk to check for version disparity
// - on rewrite dataDir on disk that must be additionally purged
// only after as a 2-phase call, allowing the older dataDir to
// hang-around in-case we need some form of recovery.
type RenameDataResp struct {
Signature uint64 `msg:"sig"`
Sign []byte
OldDataDir string // contains '<uuid>', it is designed to be passed as value to Delete(bucket, pathJoin(object, dataDir))
}
// LocalDiskIDs - GetLocalIDs response.

View File

@ -514,6 +514,12 @@ func (z *DeleteOptions) DecodeMsg(dc *msgp.Reader) (err error) {
err = msgp.WrapError(err, "UndoWrite")
return
}
case "o":
z.OldDataDir, err = dc.ReadString()
if err != nil {
err = msgp.WrapError(err, "OldDataDir")
return
}
default:
err = dc.Skip()
if err != nil {
@ -527,9 +533,24 @@ func (z *DeleteOptions) DecodeMsg(dc *msgp.Reader) (err error) {
// EncodeMsg implements msgp.Encodable
func (z *DeleteOptions) EncodeMsg(en *msgp.Writer) (err error) {
// map header, size 4
// check for omitted fields
zb0001Len := uint32(5)
var zb0001Mask uint8 /* 5 bits */
_ = zb0001Mask
if z.OldDataDir == "" {
zb0001Len--
zb0001Mask |= 0x10
}
// variable map header, size zb0001Len
err = en.Append(0x80 | uint8(zb0001Len))
if err != nil {
return
}
if zb0001Len == 0 {
return
}
// write "BaseOptions"
err = en.Append(0x84, 0xab, 0x42, 0x61, 0x73, 0x65, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73)
err = en.Append(0xab, 0x42, 0x61, 0x73, 0x65, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73)
if err != nil {
return
}
@ -569,15 +590,39 @@ func (z *DeleteOptions) EncodeMsg(en *msgp.Writer) (err error) {
err = msgp.WrapError(err, "UndoWrite")
return
}
if (zb0001Mask & 0x10) == 0 { // if not omitted
// write "o"
err = en.Append(0xa1, 0x6f)
if err != nil {
return
}
err = en.WriteString(z.OldDataDir)
if err != nil {
err = msgp.WrapError(err, "OldDataDir")
return
}
}
return
}
// MarshalMsg implements msgp.Marshaler
func (z *DeleteOptions) MarshalMsg(b []byte) (o []byte, err error) {
o = msgp.Require(b, z.Msgsize())
// map header, size 4
// check for omitted fields
zb0001Len := uint32(5)
var zb0001Mask uint8 /* 5 bits */
_ = zb0001Mask
if z.OldDataDir == "" {
zb0001Len--
zb0001Mask |= 0x10
}
// variable map header, size zb0001Len
o = append(o, 0x80|uint8(zb0001Len))
if zb0001Len == 0 {
return
}
// string "BaseOptions"
o = append(o, 0x84, 0xab, 0x42, 0x61, 0x73, 0x65, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73)
o = append(o, 0xab, 0x42, 0x61, 0x73, 0x65, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73)
// map header, size 0
_ = z.BaseOptions
o = append(o, 0x80)
@ -590,6 +635,11 @@ func (z *DeleteOptions) MarshalMsg(b []byte) (o []byte, err error) {
// string "u"
o = append(o, 0xa1, 0x75)
o = msgp.AppendBool(o, z.UndoWrite)
if (zb0001Mask & 0x10) == 0 { // if not omitted
// string "o"
o = append(o, 0xa1, 0x6f)
o = msgp.AppendString(o, z.OldDataDir)
}
return
}
@ -652,6 +702,12 @@ func (z *DeleteOptions) UnmarshalMsg(bts []byte) (o []byte, err error) {
err = msgp.WrapError(err, "UndoWrite")
return
}
case "o":
z.OldDataDir, bts, err = msgp.ReadStringBytes(bts)
if err != nil {
err = msgp.WrapError(err, "OldDataDir")
return
}
default:
bts, err = msgp.Skip(bts)
if err != nil {
@ -666,7 +722,7 @@ func (z *DeleteOptions) UnmarshalMsg(bts []byte) (o []byte, err error) {
// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
func (z *DeleteOptions) Msgsize() (s int) {
s = 1 + 12 + 1 + 2 + msgp.BoolSize + 2 + msgp.BoolSize + 2 + msgp.BoolSize
s = 1 + 12 + 1 + 2 + msgp.BoolSize + 2 + msgp.BoolSize + 2 + msgp.BoolSize + 2 + msgp.StringPrefixSize + len(z.OldDataDir)
return
}
@ -4751,10 +4807,16 @@ func (z *RenameDataResp) DecodeMsg(dc *msgp.Reader) (err error) {
return
}
switch msgp.UnsafeString(field) {
case "sig":
z.Signature, err = dc.ReadUint64()
case "Sign":
z.Sign, err = dc.ReadBytes(z.Sign)
if err != nil {
err = msgp.WrapError(err, "Signature")
err = msgp.WrapError(err, "Sign")
return
}
case "OldDataDir":
z.OldDataDir, err = dc.ReadString()
if err != nil {
err = msgp.WrapError(err, "OldDataDir")
return
}
default:
@ -4769,28 +4831,41 @@ func (z *RenameDataResp) DecodeMsg(dc *msgp.Reader) (err error) {
}
// EncodeMsg implements msgp.Encodable
func (z RenameDataResp) EncodeMsg(en *msgp.Writer) (err error) {
// map header, size 1
// write "sig"
err = en.Append(0x81, 0xa3, 0x73, 0x69, 0x67)
func (z *RenameDataResp) EncodeMsg(en *msgp.Writer) (err error) {
// map header, size 2
// write "Sign"
err = en.Append(0x82, 0xa4, 0x53, 0x69, 0x67, 0x6e)
if err != nil {
return
}
err = en.WriteUint64(z.Signature)
err = en.WriteBytes(z.Sign)
if err != nil {
err = msgp.WrapError(err, "Signature")
err = msgp.WrapError(err, "Sign")
return
}
// write "OldDataDir"
err = en.Append(0xaa, 0x4f, 0x6c, 0x64, 0x44, 0x61, 0x74, 0x61, 0x44, 0x69, 0x72)
if err != nil {
return
}
err = en.WriteString(z.OldDataDir)
if err != nil {
err = msgp.WrapError(err, "OldDataDir")
return
}
return
}
// MarshalMsg implements msgp.Marshaler
func (z RenameDataResp) MarshalMsg(b []byte) (o []byte, err error) {
func (z *RenameDataResp) MarshalMsg(b []byte) (o []byte, err error) {
o = msgp.Require(b, z.Msgsize())
// map header, size 1
// string "sig"
o = append(o, 0x81, 0xa3, 0x73, 0x69, 0x67)
o = msgp.AppendUint64(o, z.Signature)
// map header, size 2
// string "Sign"
o = append(o, 0x82, 0xa4, 0x53, 0x69, 0x67, 0x6e)
o = msgp.AppendBytes(o, z.Sign)
// string "OldDataDir"
o = append(o, 0xaa, 0x4f, 0x6c, 0x64, 0x44, 0x61, 0x74, 0x61, 0x44, 0x69, 0x72)
o = msgp.AppendString(o, z.OldDataDir)
return
}
@ -4812,10 +4887,16 @@ func (z *RenameDataResp) UnmarshalMsg(bts []byte) (o []byte, err error) {
return
}
switch msgp.UnsafeString(field) {
case "sig":
z.Signature, bts, err = msgp.ReadUint64Bytes(bts)
case "Sign":
z.Sign, bts, err = msgp.ReadBytesBytes(bts, z.Sign)
if err != nil {
err = msgp.WrapError(err, "Signature")
err = msgp.WrapError(err, "Sign")
return
}
case "OldDataDir":
z.OldDataDir, bts, err = msgp.ReadStringBytes(bts)
if err != nil {
err = msgp.WrapError(err, "OldDataDir")
return
}
default:
@ -4831,8 +4912,8 @@ func (z *RenameDataResp) UnmarshalMsg(bts []byte) (o []byte, err error) {
}
// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
func (z RenameDataResp) Msgsize() (s int) {
s = 1 + 4 + msgp.Uint64Size
func (z *RenameDataResp) Msgsize() (s int) {
s = 1 + 5 + msgp.BytesPrefixSize + len(z.Sign) + 11 + msgp.StringPrefixSize + len(z.OldDataDir)
return
}

View File

@ -85,7 +85,7 @@ type StorageAPI interface {
UpdateMetadata(ctx context.Context, volume, path string, fi FileInfo, opts UpdateMetadataOpts) error
ReadVersion(ctx context.Context, origvolume, volume, path, versionID string, opts ReadOptions) (FileInfo, error)
ReadXL(ctx context.Context, volume, path string, readData bool) (RawFileInfo, error)
RenameData(ctx context.Context, srcVolume, srcPath string, fi FileInfo, dstVolume, dstPath string, opts RenameOptions) (uint64, error)
RenameData(ctx context.Context, srcVolume, srcPath string, fi FileInfo, dstVolume, dstPath string, opts RenameOptions) (RenameDataResp, error)
// File operations.
ListDir(ctx context.Context, origvolume, volume, dirPath string, count int) ([]string, error)

View File

@ -440,7 +440,9 @@ func (client *storageRESTClient) CheckParts(ctx context.Context, volume string,
}
// RenameData - rename source path to destination path atomically, metadata and data file.
func (client *storageRESTClient) RenameData(ctx context.Context, srcVolume, srcPath string, fi FileInfo, dstVolume, dstPath string, opts RenameOptions) (sign uint64, err error) {
func (client *storageRESTClient) RenameData(ctx context.Context, srcVolume, srcPath string, fi FileInfo,
dstVolume, dstPath string, opts RenameOptions,
) (res RenameDataResp, err error) {
params := RenameDataHandlerParams{
DiskID: *client.diskID.Load(),
SrcVolume: srcVolume,
@ -457,11 +459,11 @@ func (client *storageRESTClient) RenameData(ctx context.Context, srcVolume, srcP
resp, err = storageRenameDataInlineRPC.Call(ctx, client.gridConn, &RenameDataInlineHandlerParams{params})
}
if err != nil {
return 0, toStorageErr(err)
return res, toStorageErr(err)
}
defer storageRenameDataRPC.PutResponse(resp)
return resp.Signature, nil
return *resp, nil
}
// where we keep old *Readers

View File

@ -67,7 +67,7 @@ var (
storageWriteAllRPC = grid.NewSingleHandler[*WriteAllHandlerParams, grid.NoPayload](grid.HandlerWriteAll, func() *WriteAllHandlerParams { return &WriteAllHandlerParams{} }, grid.NewNoPayload)
storageReadVersionRPC = grid.NewSingleHandler[*grid.MSS, *FileInfo](grid.HandlerReadVersion, grid.NewMSS, func() *FileInfo { return &FileInfo{} })
storageReadXLRPC = grid.NewSingleHandler[*grid.MSS, *RawFileInfo](grid.HandlerReadXL, grid.NewMSS, func() *RawFileInfo { return &RawFileInfo{} })
storageRenameDataRPC = grid.NewSingleHandler[*RenameDataHandlerParams, *RenameDataResp](grid.HandlerRenameData, func() *RenameDataHandlerParams { return &RenameDataHandlerParams{} }, func() *RenameDataResp { return &RenameDataResp{} })
storageRenameDataRPC = grid.NewSingleHandler[*RenameDataHandlerParams, *RenameDataResp](grid.HandlerRenameData2, func() *RenameDataHandlerParams { return &RenameDataHandlerParams{} }, func() *RenameDataResp { return &RenameDataResp{} })
storageRenameDataInlineRPC = grid.NewSingleHandler[*RenameDataInlineHandlerParams, *RenameDataResp](grid.HandlerRenameDataInline, newRenameDataInlineHandlerParams, func() *RenameDataResp { return &RenameDataResp{} }).AllowCallRequestPool(false)
storageRenameFileRPC = grid.NewSingleHandler[*RenameFileHandlerParams, grid.NoPayload](grid.HandlerRenameFile, func() *RenameFileHandlerParams { return &RenameFileHandlerParams{} }, grid.NewNoPayload).AllowCallRequestPool(true)
storageStatVolRPC = grid.NewSingleHandler[*grid.MSS, *VolInfo](grid.HandlerStatVol, grid.NewMSS, func() *VolInfo { return &VolInfo{} })
@ -695,10 +695,8 @@ func (s *storageRESTServer) RenameDataHandler(p *RenameDataHandlerParams) (*Rena
return nil, grid.NewRemoteErr(errDiskNotFound)
}
sign, err := s.getStorage().RenameData(context.Background(), p.SrcVolume, p.SrcPath, p.FI, p.DstVolume, p.DstPath, p.Opts)
return &RenameDataResp{
Signature: sign,
}, grid.NewRemoteErr(err)
resp, err := s.getStorage().RenameData(context.Background(), p.SrcVolume, p.SrcPath, p.FI, p.DstVolume, p.DstPath, p.Opts)
return &resp, grid.NewRemoteErr(err)
}
// RenameDataInlineHandler - renames a meta object and data dir to destination.

View File

@ -461,10 +461,10 @@ func (p *xlStorageDiskIDCheck) RenameFile(ctx context.Context, srcVolume, srcPat
return w.Run(func() error { return p.storage.RenameFile(ctx, srcVolume, srcPath, dstVolume, dstPath) })
}
func (p *xlStorageDiskIDCheck) RenameData(ctx context.Context, srcVolume, srcPath string, fi FileInfo, dstVolume, dstPath string, opts RenameOptions) (sign uint64, err error) {
func (p *xlStorageDiskIDCheck) RenameData(ctx context.Context, srcVolume, srcPath string, fi FileInfo, dstVolume, dstPath string, opts RenameOptions) (res RenameDataResp, err error) {
ctx, done, err := p.TrackDiskHealth(ctx, storageMetricRenameData, srcPath, fi.DataDir, dstVolume, dstPath)
if err != nil {
return 0, err
return res, err
}
defer func() {
if err == nil && !skipAccessChecks(dstVolume) {
@ -472,11 +472,12 @@ func (p *xlStorageDiskIDCheck) RenameData(ctx context.Context, srcVolume, srcPat
}
done(&err)
}()
// Copy inline data to a new buffer to function with deadlines.
if len(fi.Data) > 0 {
fi.Data = append(grid.GetByteBufferCap(len(fi.Data))[:0], fi.Data...)
}
return xioutil.WithDeadline[uint64](ctx, globalDriveConfig.GetMaxTimeout(), func(ctx context.Context) (result uint64, err error) {
return xioutil.WithDeadline[RenameDataResp](ctx, globalDriveConfig.GetMaxTimeout(), func(ctx context.Context) (res RenameDataResp, err error) {
if len(fi.Data) > 0 {
defer grid.PutByteBuffer(fi.Data)
}

View File

@ -29,6 +29,7 @@ import (
pathutil "path"
"path/filepath"
"runtime"
"slices"
"strconv"
"strings"
"sync"
@ -48,7 +49,6 @@ import (
xioutil "github.com/minio/minio/internal/ioutil"
"github.com/minio/minio/internal/logger"
"github.com/pkg/xattr"
"github.com/zeebo/xxh3"
)
const (
@ -65,6 +65,9 @@ const (
// XL metadata file carries per object metadata.
xlStorageFormatFile = "xl.meta"
// XL metadata file backup file carries previous per object metadata.
xlStorageFormatFileBackup = "xl.meta.bkp"
)
var alignedBuf []byte
@ -1366,6 +1369,10 @@ func (s *xlStorage) DeleteVersion(ctx context.Context, volume, path string, fi F
return s.WriteAll(ctx, volume, pathJoin(path, xlStorageFormatFile), buf)
}
if opts.UndoWrite && opts.OldDataDir != "" {
return renameAll(pathJoin(filePath, opts.OldDataDir, xlStorageFormatFileBackup), pathJoin(filePath, xlStorageFormatFile), filePath)
}
return s.deleteFile(volumeDir, pathJoin(volumeDir, path, xlStorageFormatFile), true, false)
}
@ -1375,12 +1382,21 @@ func (s *xlStorage) UpdateMetadata(ctx context.Context, volume, path string, fi
return errInvalidArgument
}
volumeDir, err := s.getVolDir(volume)
if err != nil {
return err
}
// Validate file path length, before reading.
filePath := pathJoin(volumeDir, path)
if err = checkPathLength(filePath); err != nil {
return err
}
buf, err := s.ReadAll(ctx, volume, pathJoin(path, xlStorageFormatFile))
if err != nil {
if err == errFileNotFound {
if fi.VersionID != "" {
return errFileVersionNotFound
}
if err == errFileNotFound && fi.VersionID != "" {
return errFileVersionNotFound
}
return err
}
@ -1405,7 +1421,7 @@ func (s *xlStorage) UpdateMetadata(ctx context.Context, volume, path string, fi
}
defer metaDataPoolPut(wbuf)
return s.writeAll(ctx, volume, pathJoin(path, xlStorageFormatFile), wbuf, !opts.NoPersistence)
return s.writeAll(ctx, volume, pathJoin(path, xlStorageFormatFile), wbuf, !opts.NoPersistence, volumeDir)
}
// WriteMetadata - writes FileInfo metadata for path at `xl.meta`
@ -1438,7 +1454,7 @@ func (s *xlStorage) WriteMetadata(ctx context.Context, origvolume, volume, path
// this is currently used by
// - emphemeral objects such as objects created during listObjects() calls
// - newMultipartUpload() call..
return s.writeAll(ctx, volume, pathJoin(path, xlStorageFormatFile), buf, false)
return s.writeAll(ctx, volume, pathJoin(path, xlStorageFormatFile), buf, false, "")
}
buf, err := s.ReadAll(ctx, volume, pathJoin(path, xlStorageFormatFile))
@ -1936,14 +1952,17 @@ func (s *xlStorage) openFileDirect(path string, mode int) (f *os.File, err error
return w, nil
}
func (s *xlStorage) openFileSync(filePath string, mode int) (f *os.File, err error) {
return s.openFile(filePath, mode|writeMode)
func (s *xlStorage) openFileSync(filePath string, mode int, skipParent string) (f *os.File, err error) {
return s.openFile(filePath, mode|writeMode, skipParent)
}
func (s *xlStorage) openFile(filePath string, mode int) (f *os.File, err error) {
func (s *xlStorage) openFile(filePath string, mode int, skipParent string) (f *os.File, err error) {
if skipParent == "" {
skipParent = s.drivePath
}
// Create top level directories if they don't exist.
// with mode 0777 mkdir honors system umask.
if err = mkdirAll(pathutil.Dir(filePath), 0o777, s.drivePath); err != nil {
if err = mkdirAll(pathutil.Dir(filePath), 0o777, skipParent); err != nil {
return nil, osErrToFileErr(err)
}
@ -2095,18 +2114,22 @@ func (s *xlStorage) CreateFile(ctx context.Context, origvolume, volume, path str
}
}()
return s.writeAllDirect(ctx, filePath, fileSize, r, os.O_CREATE|os.O_WRONLY|os.O_EXCL)
return s.writeAllDirect(ctx, filePath, fileSize, r, os.O_CREATE|os.O_WRONLY|os.O_EXCL, volumeDir)
}
func (s *xlStorage) writeAllDirect(ctx context.Context, filePath string, fileSize int64, r io.Reader, flags int) (err error) {
func (s *xlStorage) writeAllDirect(ctx context.Context, filePath string, fileSize int64, r io.Reader, flags int, skipParent string) (err error) {
if contextCanceled(ctx) {
return ctx.Err()
}
if skipParent == "" {
skipParent = s.drivePath
}
// Create top level directories if they don't exist.
// with mode 0777 mkdir honors system umask.
parentFilePath := pathutil.Dir(filePath)
if err = mkdirAll(parentFilePath, 0o777, s.drivePath); err != nil {
if err = mkdirAll(parentFilePath, 0o777, skipParent); err != nil {
return osErrToFileErr(err)
}
@ -2168,7 +2191,7 @@ func (s *xlStorage) writeAllDirect(ctx context.Context, filePath string, fileSiz
return w.Close()
}
func (s *xlStorage) writeAll(ctx context.Context, volume string, path string, b []byte, sync bool) (err error) {
func (s *xlStorage) writeAll(ctx context.Context, volume string, path string, b []byte, sync bool, skipParent string) (err error) {
if contextCanceled(ctx) {
return ctx.Err()
}
@ -2194,11 +2217,11 @@ func (s *xlStorage) writeAll(ctx context.Context, volume string, path string, b
// This is an optimization mainly to ensure faster I/O.
if len(b) > xioutil.DirectioAlignSize {
r := bytes.NewReader(b)
return s.writeAllDirect(ctx, filePath, r.Size(), r, flags)
return s.writeAllDirect(ctx, filePath, r.Size(), r, flags, skipParent)
}
w, err = s.openFileSync(filePath, flags)
w, err = s.openFileSync(filePath, flags, skipParent)
} else {
w, err = s.openFile(filePath, flags)
w, err = s.openFile(filePath, flags, skipParent)
}
if err != nil {
return err
@ -2235,7 +2258,12 @@ func (s *xlStorage) WriteAll(ctx context.Context, volume string, path string, b
s.Unlock()
}
return s.writeAll(ctx, volume, path, b, true)
volumeDir, err := s.getVolDir(volume)
if err != nil {
return err
}
return s.writeAll(ctx, volume, path, b, true, volumeDir)
}
// AppendFile - append a byte array at path, if file doesn't exist at
@ -2261,7 +2289,7 @@ func (s *xlStorage) AppendFile(ctx context.Context, volume string, path string,
var w *os.File
// Create file if not found. Not doing O_DIRECT here to avoid the code that does buffer aligned writes.
// AppendFile() is only used by healing code to heal objects written in old format.
w, err = s.openFileSync(filePath, os.O_CREATE|os.O_APPEND|os.O_WRONLY)
w, err = s.openFileSync(filePath, os.O_CREATE|os.O_APPEND|os.O_WRONLY, volumeDir)
if err != nil {
return err
}
@ -2425,7 +2453,7 @@ func skipAccessChecks(volume string) (ok bool) {
}
// RenameData - rename source path to destination path atomically, metadata and data directory.
func (s *xlStorage) RenameData(ctx context.Context, srcVolume, srcPath string, fi FileInfo, dstVolume, dstPath string, opts RenameOptions) (sign uint64, err error) {
func (s *xlStorage) RenameData(ctx context.Context, srcVolume, srcPath string, fi FileInfo, dstVolume, dstPath string, opts RenameOptions) (res RenameDataResp, err error) {
defer func() {
ignoredErrs := []error{
errFileNotFound,
@ -2451,24 +2479,24 @@ func (s *xlStorage) RenameData(ctx context.Context, srcVolume, srcPath string, f
srcVolumeDir, err := s.getVolDir(srcVolume)
if err != nil {
return 0, err
return res, err
}
dstVolumeDir, err := s.getVolDir(dstVolume)
if err != nil {
return 0, err
return res, err
}
if !skipAccessChecks(srcVolume) {
// Stat a volume entry.
if err = Access(srcVolumeDir); err != nil {
return 0, convertAccessError(err, errVolumeAccessDenied)
return res, convertAccessError(err, errVolumeAccessDenied)
}
}
if !skipAccessChecks(dstVolume) {
if err = Access(dstVolumeDir); err != nil {
return 0, convertAccessError(err, errVolumeAccessDenied)
return res, convertAccessError(err, errVolumeAccessDenied)
}
}
@ -2490,13 +2518,17 @@ func (s *xlStorage) RenameData(ctx context.Context, srcVolume, srcPath string, f
}
if err = checkPathLength(srcFilePath); err != nil {
return 0, err
return res, err
}
if err = checkPathLength(dstFilePath); err != nil {
return 0, err
return res, err
}
s.RLock()
formatLegacy := s.formatLegacy
s.RUnlock()
dstBuf, err := xioutil.ReadFile(dstFilePath)
if err != nil {
// handle situations when dstFilePath is 'file'
@ -2506,20 +2538,22 @@ func (s *xlStorage) RenameData(ctx context.Context, srcVolume, srcPath string, f
if isSysErrNotDir(err) && runtime.GOOS != globalWindowsOSName {
// NOTE: On windows the error happens at
// next line and returns appropriate error.
return 0, errFileAccessDenied
return res, errFileAccessDenied
}
if !osIsNotExist(err) {
return 0, osErrToFileErr(err)
return res, osErrToFileErr(err)
}
// errFileNotFound comes here.
err = s.renameLegacyMetadata(dstVolumeDir, dstPath)
if err != nil && err != errFileNotFound {
return 0, err
}
if err == nil {
dstBuf, err = xioutil.ReadFile(dstFilePath)
if err != nil && !osIsNotExist(err) {
return 0, osErrToFileErr(err)
if formatLegacy {
// errFileNotFound comes here.
err = s.renameLegacyMetadata(dstVolumeDir, dstPath)
if err != nil && err != errFileNotFound {
return res, err
}
if err == nil {
dstBuf, err = xioutil.ReadFile(dstFilePath)
if err != nil && !osIsNotExist(err) {
return res, osErrToFileErr(err)
}
}
}
}
@ -2548,9 +2582,6 @@ func (s *xlStorage) RenameData(ctx context.Context, srcVolume, srcPath string, f
}
}
} else {
s.RLock()
formatLegacy := s.formatLegacy
s.RUnlock()
// It is possible that some drives may not have `xl.meta` file
// in such scenarios verify if at least `part.1` files exist
// to verify for legacy version.
@ -2562,7 +2593,7 @@ func (s *xlStorage) RenameData(ctx context.Context, srcVolume, srcPath string, f
currentDataPath := pathJoin(dstVolumeDir, dstPath)
entries, err := readDirN(currentDataPath, 1)
if err != nil && err != errFileNotFound {
return 0, osErrToFileErr(err)
return res, osErrToFileErr(err)
}
for _, entry := range entries {
if entry == xlStorageFormatFile || strings.HasSuffix(entry, slashSeparator) {
@ -2576,64 +2607,58 @@ func (s *xlStorage) RenameData(ctx context.Context, srcVolume, srcPath string, f
}
}
legacyDataPath := pathJoin(dstVolumeDir, dstPath, legacyDataDir)
if legacyPreserved {
// Preserve all the legacy data, could be slow, but at max there can be 10,000 parts.
currentDataPath := pathJoin(dstVolumeDir, dstPath)
entries, err := readDir(currentDataPath)
if err != nil {
return 0, osErrToFileErr(err)
}
// legacy data dir means its old content, honor system umask.
if err = mkdirAll(legacyDataPath, 0o777, dstVolumeDir); err != nil {
// any failed mkdir-calls delete them.
s.deleteFile(dstVolumeDir, legacyDataPath, true, false)
return 0, osErrToFileErr(err)
}
for _, entry := range entries {
// Skip xl.meta renames further, also ignore any directories such as `legacyDataDir`
if entry == xlStorageFormatFile || strings.HasSuffix(entry, slashSeparator) {
continue
var legacyDataPath string
if formatLegacy {
legacyDataPath = pathJoin(dstVolumeDir, dstPath, legacyDataDir)
if legacyPreserved {
// Preserve all the legacy data, could be slow, but at max there can be 1res,000 parts.
currentDataPath := pathJoin(dstVolumeDir, dstPath)
entries, err := readDir(currentDataPath)
if err != nil {
return res, osErrToFileErr(err)
}
if err = Rename(pathJoin(currentDataPath, entry), pathJoin(legacyDataPath, entry)); err != nil {
// Any failed rename calls un-roll previous transaction.
// legacy data dir means its old content, honor system umask.
if err = mkdirAll(legacyDataPath, 0o777, dstVolumeDir); err != nil {
// any failed mkdir-calls delete them.
s.deleteFile(dstVolumeDir, legacyDataPath, true, false)
return res, osErrToFileErr(err)
}
return 0, osErrToFileErr(err)
for _, entry := range entries {
// Skip xl.meta renames further, also ignore any directories such as `legacyDataDir`
if entry == xlStorageFormatFile || strings.HasSuffix(entry, slashSeparator) {
continue
}
if err = Rename(pathJoin(currentDataPath, entry), pathJoin(legacyDataPath, entry)); err != nil {
// Any failed rename calls un-roll previous transaction.
s.deleteFile(dstVolumeDir, legacyDataPath, true, false)
return res, osErrToFileErr(err)
}
}
}
}
var oldDstDataPath, reqVID string
// Set skipParent to skip mkdirAll() calls for deeply nested objects
// - if its an overwrite
// - if its a versioned object
//
// This can potentiall reduce syscalls by strings.Split(path, "/")
// times relative to the object name.
skipParent := dstVolumeDir
if len(dstBuf) > 0 {
skipParent = pathutil.Dir(dstFilePath)
}
var reqVID string
if fi.VersionID == "" {
reqVID = nullVersionID
} else {
reqVID = fi.VersionID
}
// Replace the data of null version or any other existing version-id
_, ver, err := xlMeta.findVersionStr(reqVID)
if err == nil {
dataDir := ver.getDataDir()
if dataDir != "" && (xlMeta.SharedDataDirCountStr(reqVID, dataDir) == 0) {
// Purge the destination path as we are not preserving anything
// versioned object was not requested.
oldDstDataPath = pathJoin(dstVolumeDir, dstPath, dataDir)
// if old destination path is same as new destination path
// there is nothing to purge, this is true in case of healing
// avoid setting oldDstDataPath at that point.
if oldDstDataPath == dstDataPath {
oldDstDataPath = ""
} else {
xlMeta.data.remove(reqVID, dataDir)
}
}
}
// Empty fi.VersionID indicates that versioning is either
// suspended or disabled on this bucket. RenameData will replace
// the 'null' version. We add a free-version to track its tiered
@ -2651,74 +2676,115 @@ func (s *xlStorage) RenameData(ctx context.Context, srcVolume, srcPath string, f
}
// indicates if RenameData() is called by healing.
// healing doesn't preserve the dataDir as 'legacy'
healing := fi.XLV1 && fi.DataDir != legacyDataDir
healing := fi.Healing()
// Replace the data of null version or any other existing version-id
_, ver, err := xlMeta.findVersionStr(reqVID)
if err == nil {
dataDir := ver.getDataDir()
if dataDir != "" && (xlMeta.SharedDataDirCountStr(reqVID, dataDir) == 0) {
// Purge the destination path as we are not preserving anything
// versioned object was not requested.
res.OldDataDir = dataDir
if healing {
// if old destination path is same as new destination path
// there is nothing to purge, this is true in case of healing
// avoid setting OldDataDir at that point.
res.OldDataDir = ""
} else {
xlMeta.data.remove(reqVID, dataDir)
}
}
}
if err = xlMeta.AddVersion(fi); err != nil {
if legacyPreserved {
// Any failed rename calls un-roll previous transaction.
s.deleteFile(dstVolumeDir, legacyDataPath, true, false)
}
return 0, err
return res, err
}
var sbuf bytes.Buffer
for _, ver := range xlMeta.versions {
sbuf.Write(ver.header.Signature[:])
if len(xlMeta.versions) <= 10 {
// any number of versions beyond this is excessive
// avoid healing such objects in this manner, let
// it heal during the regular scanner cycle.
dst := []byte{}
for _, ver := range xlMeta.versions {
dst = slices.Grow(dst, 16)
copy(dst[len(dst):], ver.header.VersionID[:])
}
res.Sign = dst
}
sign = xxh3.Hash(sbuf.Bytes())
dstBuf, err = xlMeta.AppendTo(metaDataPoolGet())
defer metaDataPoolPut(dstBuf)
newDstBuf, err := xlMeta.AppendTo(metaDataPoolGet())
defer metaDataPoolPut(newDstBuf)
if err != nil {
if legacyPreserved {
s.deleteFile(dstVolumeDir, legacyDataPath, true, false)
}
return 0, errFileCorrupt
return res, errFileCorrupt
}
if err = s.WriteAll(ctx, srcVolume, pathJoin(srcPath, xlStorageFormatFile), dstBuf); err != nil {
if err = s.WriteAll(ctx, srcVolume, pathJoin(srcPath, xlStorageFormatFile), newDstBuf); err != nil {
if legacyPreserved {
s.deleteFile(dstVolumeDir, legacyDataPath, true, false)
}
return 0, osErrToFileErr(err)
return res, osErrToFileErr(err)
}
diskHealthCheckOK(ctx, err)
if srcDataPath != "" && len(fi.Data) == 0 && fi.Size > 0 {
// renameAll only for objects that have xl.meta not saved inline.
s.moveToTrash(dstDataPath, true, false)
notInline := srcDataPath != "" && len(fi.Data) == 0 && fi.Size > 0
if notInline {
if healing {
// renameAll only for objects that have xl.meta not saved inline.
// this must be done in healing only, otherwise it is expected
// that for fresh PutObject() call dstDataPath can never exist.
// if its an overwrite then the caller deletes the DataDir
// in a separate RPC call.
s.moveToTrash(dstDataPath, true, false)
// If we are healing we should purge any legacyDataPath content,
// that was previously preserved during PutObject() call
// on a versioned bucket.
s.moveToTrash(legacyDataPath, true, false)
}
if err = renameAll(srcDataPath, dstDataPath, dstVolumeDir); err != nil {
if err = renameAll(srcDataPath, dstDataPath, skipParent); err != nil {
if legacyPreserved {
// Any failed rename calls un-roll previous transaction.
s.deleteFile(dstVolumeDir, legacyDataPath, true, false)
}
// if its a partial rename() do not attempt to delete recursively.
s.deleteFile(dstVolumeDir, dstDataPath, false, false)
return 0, osErrToFileErr(err)
return res, osErrToFileErr(err)
}
}
// When we are not inlined and there is no oldDataDir present
// we backup existing xl.meta -> xl.meta.bkp - this is done to
// ensure for some reason we didn't get enough quorum we can
// revert this back to original xl.meta and preserve the older dataDir.
if notInline && res.OldDataDir != "" {
// preserve current xl.meta inside the oldDataDir.
if err = s.writeAll(ctx, dstVolume, pathJoin(dstPath, res.OldDataDir, xlStorageFormatFileBackup), dstBuf, true, skipParent); err != nil {
if legacyPreserved {
s.deleteFile(dstVolumeDir, legacyDataPath, true, false)
}
return res, osErrToFileErr(err)
}
diskHealthCheckOK(ctx, err)
}
// Commit meta-file
if err = renameAll(srcFilePath, dstFilePath, dstVolumeDir); err != nil {
if err = renameAll(srcFilePath, dstFilePath, skipParent); err != nil {
if legacyPreserved {
// Any failed rename calls un-roll previous transaction.
s.deleteFile(dstVolumeDir, legacyDataPath, true, false)
}
// if its a partial rename() do not attempt to delete recursively.
// this can be healed since all parts are available.
s.deleteFile(dstVolumeDir, dstDataPath, false, false)
return 0, osErrToFileErr(err)
}
// additionally only purge older data at the end of the transaction of new data-dir
// movement, this is to ensure that previous data references can co-exist for
// any recoverability.
if oldDstDataPath != "" {
s.moveToTrash(oldDstDataPath, true, false)
return res, osErrToFileErr(err)
}
if srcVolume != minioMetaMultipartBucket {
@ -2729,7 +2795,7 @@ func (s *xlStorage) RenameData(ctx context.Context, srcVolume, srcPath string, f
} else {
s.deleteFile(srcVolumeDir, pathutil.Dir(srcFilePath), true, false)
}
return sign, nil
return res, nil
}
// RenameFile - rename source path to destination path atomically.
@ -3047,8 +3113,11 @@ func (s *xlStorage) CleanAbandonedData(ctx context.Context, volume string, path
// Do not abort on context errors.
for dir := range foundDirs {
toRemove := pathJoin(volumeDir, path, dir+SlashSeparator)
err := s.deleteFile(volumeDir, toRemove, true, true)
err = s.deleteFile(volumeDir, toRemove, true, true)
diskHealthCheckOK(ctx, err)
if err != nil {
return err
}
}
// Do the same for inline data
@ -3056,32 +3125,38 @@ func (s *xlStorage) CleanAbandonedData(ctx context.Context, volume string, path
if err != nil {
return err
}
// Clear and repopulate
for k := range foundDirs {
delete(foundDirs, k)
}
// Populate into map
for _, k := range dirs {
foundDirs[k] = struct{}{}
}
// Delete all directories we expect to be there.
for _, dir := range wantDirs {
delete(foundDirs, dir)
}
// Nothing to delete
if len(foundDirs) == 0 {
return nil
}
// Delete excessive inline entries.
if len(foundDirs) > 0 {
// Convert to slice.
dirs = dirs[:0]
for dir := range foundDirs {
dirs = append(dirs, dir)
}
if xl.data.remove(dirs...) {
newBuf, err := xl.AppendTo(metaDataPoolGet())
if err == nil {
defer metaDataPoolPut(newBuf)
return s.WriteAll(ctx, volume, pathJoin(path, xlStorageFormatFile), buf)
}
// Convert to slice.
dirs = dirs[:0]
for dir := range foundDirs {
dirs = append(dirs, dir)
}
if xl.data.remove(dirs...) {
newBuf, err := xl.AppendTo(metaDataPoolGet())
if err == nil {
defer metaDataPoolPut(newBuf)
return s.WriteAll(ctx, volume, pathJoin(path, xlStorageFormatFile), buf)
}
}
return nil

View File

@ -43,10 +43,7 @@ unset MINIO_KMS_KES_KEY_FILE
unset MINIO_KMS_KES_ENDPOINT
unset MINIO_KMS_KES_KEY_NAME
(
cd ./docs/debugging/s3-check-md5
go install -v
)
go install -v github.com/minio/minio/docs/debugging/s3-check-md5@latest
wget -q -O mc https://dl.minio.io/client/mc/release/linux-amd64/mc &&
chmod +x mc

View File

@ -147,10 +147,7 @@ if [ $ret -ne 0 ]; then
exit 1
fi
(
cd ./docs/debugging/s3-check-md5
go install -v
)
go install -v github.com/minio/minio/docs/debugging/s3-check-md5@latest
s3-check-md5 -versions -access-key minioadmin -secret-key minioadmin -endpoint http://127.0.0.1:9001/ -bucket versioned

View File

@ -158,10 +158,7 @@ if [ $ret -ne 0 ]; then
exit 1
fi
(
cd ./docs/debugging/s3-check-md5
go install -v
)
go install -v github.com/minio/minio/docs/debugging/s3-check-md5@latest
s3-check-md5 -versions -access-key minioadmin -secret-key minioadmin -endpoint http://127.0.0.1:9001/ -bucket versioned

View File

@ -144,10 +144,7 @@ if [ "${expected_checksum}" != "${got_checksum}" ]; then
exit 1
fi
(
cd ./docs/debugging/s3-check-md5
go install -v
)
go install -v github.com/minio/minio/docs/debugging/s3-check-md5@latest
s3-check-md5 -versions -access-key minioadmin -secret-key minioadmin -endpoint http://127.0.0.1:9001/ -bucket versioned

View File

@ -210,10 +210,7 @@ if [ "${expected_checksum}" != "${got_checksum}" ]; then
exit 1
fi
(
cd ./docs/debugging/s3-check-md5
go install -v
)
go install -v github.com/minio/minio/docs/debugging/s3-check-md5@latest
s3-check-md5 -versions -access-key minioadmin -secret-key minioadmin -endpoint http://127.0.0.1:9001/ -bucket bucket2
s3-check-md5 -versions -access-key minioadmin -secret-key minioadmin -endpoint http://127.0.0.1:9001/ -bucket versioned

View File

@ -111,6 +111,7 @@ const (
HandlerWriteAll
HandlerListBuckets
HandlerRenameDataInline
HandlerRenameData2
// Add more above here ^^^
// If all handlers are used, the type of Handler can be changed.
@ -189,6 +190,8 @@ var handlerPrefixes = [handlerLast]string{
HandlerConsoleLog: peerPrefix,
HandlerListDir: storagePrefix,
HandlerListBuckets: peerPrefixS3,
HandlerRenameDataInline: storagePrefix,
HandlerRenameData2: storagePrefix,
}
const (

View File

@ -81,14 +81,15 @@ func _() {
_ = x[HandlerWriteAll-70]
_ = x[HandlerListBuckets-71]
_ = x[HandlerRenameDataInline-72]
_ = x[handlerTest-73]
_ = x[handlerTest2-74]
_ = x[handlerLast-75]
_ = x[HandlerRenameData2-73]
_ = x[handlerTest-74]
_ = x[handlerTest2-75]
_ = x[handlerLast-76]
}
const _HandlerID_name = "handlerInvalidLockLockLockRLockLockUnlockLockRUnlockLockRefreshLockForceUnlockWalkDirStatVolDiskInfoNSScannerReadXLReadVersionDeleteFileDeleteVersionUpdateMetadataWriteMetadataCheckPartsRenameDataRenameFileReadAllServerVerifyTraceListenDeleteBucketMetadataLoadBucketMetadataReloadSiteReplicationConfigReloadPoolMetaStopRebalanceLoadRebalanceMetaLoadTransitionTierConfigDeletePolicyLoadPolicyLoadPolicyMappingDeleteServiceAccountLoadServiceAccountDeleteUserLoadUserLoadGroupHealBucketMakeBucketHeadBucketDeleteBucketGetMetricsGetResourceMetricsGetMemInfoGetProcInfoGetOSInfoGetPartitionsGetNetInfoGetCPUsServerInfoGetSysConfigGetSysServicesGetSysErrorsGetAllBucketStatsGetBucketStatsGetSRMetricsGetPeerMetricsGetMetacacheListingUpdateMetacacheListingGetPeerBucketMetricsStorageInfoConsoleLogListDirGetLocksBackgroundHealStatusGetLastDayTierStatsSignalServiceGetBandwidthWriteAllListBucketsRenameDataInlinehandlerTesthandlerTest2handlerLast"
const _HandlerID_name = "handlerInvalidLockLockLockRLockLockUnlockLockRUnlockLockRefreshLockForceUnlockWalkDirStatVolDiskInfoNSScannerReadXLReadVersionDeleteFileDeleteVersionUpdateMetadataWriteMetadataCheckPartsRenameDataRenameFileReadAllServerVerifyTraceListenDeleteBucketMetadataLoadBucketMetadataReloadSiteReplicationConfigReloadPoolMetaStopRebalanceLoadRebalanceMetaLoadTransitionTierConfigDeletePolicyLoadPolicyLoadPolicyMappingDeleteServiceAccountLoadServiceAccountDeleteUserLoadUserLoadGroupHealBucketMakeBucketHeadBucketDeleteBucketGetMetricsGetResourceMetricsGetMemInfoGetProcInfoGetOSInfoGetPartitionsGetNetInfoGetCPUsServerInfoGetSysConfigGetSysServicesGetSysErrorsGetAllBucketStatsGetBucketStatsGetSRMetricsGetPeerMetricsGetMetacacheListingUpdateMetacacheListingGetPeerBucketMetricsStorageInfoConsoleLogListDirGetLocksBackgroundHealStatusGetLastDayTierStatsSignalServiceGetBandwidthWriteAllListBucketsRenameDataInlineRenameData2handlerTesthandlerTest2handlerLast"
var _HandlerID_index = [...]uint16{0, 14, 22, 31, 41, 52, 63, 78, 85, 92, 100, 109, 115, 126, 136, 149, 163, 176, 186, 196, 206, 213, 225, 230, 236, 256, 274, 301, 315, 328, 345, 369, 381, 391, 408, 428, 446, 456, 464, 473, 483, 493, 503, 515, 525, 543, 553, 564, 573, 586, 596, 603, 613, 625, 639, 651, 668, 682, 694, 708, 727, 749, 769, 780, 790, 797, 805, 825, 844, 857, 869, 877, 888, 904, 915, 927, 938}
var _HandlerID_index = [...]uint16{0, 14, 22, 31, 41, 52, 63, 78, 85, 92, 100, 109, 115, 126, 136, 149, 163, 176, 186, 196, 206, 213, 225, 230, 236, 256, 274, 301, 315, 328, 345, 369, 381, 391, 408, 428, 446, 456, 464, 473, 483, 493, 503, 515, 525, 543, 553, 564, 573, 586, 596, 603, 613, 625, 639, 651, 668, 682, 694, 708, 727, 749, 769, 780, 790, 797, 805, 825, 844, 857, 869, 877, 888, 904, 915, 926, 938, 949}
func (i HandlerID) String() string {
if i >= HandlerID(len(_HandlerID_index)-1) {