change default lock retry interval to 50ms (#15560)

competing calls on the same object on versioned bucket
mutating calls on the same object may unexpected have
higher delays.

This can be reproduced with a replicated bucket
overwriting the same object writes, deletes repeatedly.

For longer locks like scanner keep the 1sec interval
This commit is contained in:
Harshavardhana 2022-08-19 16:21:05 -07:00 committed by GitHub
parent a2e037f0ec
commit ae4ee95d25
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 49 additions and 18 deletions

View File

@ -1059,13 +1059,15 @@ func replicateObjectToTarget(ctx context.Context, ri ReplicateObjectInfo, object
VersionSuspended: versionSuspended,
})
if err != nil {
sendEvent(eventArgs{
EventName: event.ObjectReplicationNotTracked,
BucketName: bucket,
Object: objInfo,
Host: "Internal: [Replication]",
})
logger.LogIf(ctx, fmt.Errorf("Unable to update replicate for %s/%s(%s): %w", bucket, object, objInfo.VersionID, err))
if !isErrObjectNotFound(err) {
sendEvent(eventArgs{
EventName: event.ObjectReplicationNotTracked,
BucketName: bucket,
Object: objInfo,
Host: "Internal: [Replication]",
})
logger.LogIf(ctx, fmt.Errorf("Unable to update replicate metadata for %s/%s(%s): %w", bucket, object, objInfo.VersionID, err))
}
return
}
defer func() {

View File

@ -64,7 +64,11 @@ const (
var (
globalHealConfig heal.Config
dataScannerLeaderLockTimeout = newDynamicTimeout(30*time.Second, 10*time.Second)
dataScannerLeaderLockTimeout = newDynamicTimeoutWithOpts(dynamicTimeoutOpts{
timeout: 30 * time.Second,
minimum: 10 * time.Second,
retryInterval: time.Second,
})
// Sleeper values are updated when config is loaded.
scannerSleeper = newDynamicSleeper(10, 10*time.Second, true)
scannerCycle = uatomic.NewDuration(dataScannerStartDelay)

View File

@ -34,11 +34,24 @@ const (
// timeouts that are dynamically adapted based on actual usage results
type dynamicTimeout struct {
timeout int64
minimum int64
entries int64
log [dynamicTimeoutLogSize]time.Duration
mutex sync.Mutex
timeout int64
minimum int64
entries int64
log [dynamicTimeoutLogSize]time.Duration
mutex sync.Mutex
retryInterval time.Duration
}
type dynamicTimeoutOpts struct {
timeout time.Duration
minimum time.Duration
retryInterval time.Duration
}
func newDynamicTimeoutWithOpts(opts dynamicTimeoutOpts) *dynamicTimeout {
dt := newDynamicTimeout(opts.timeout, opts.minimum)
dt.retryInterval = opts.retryInterval
return dt
}
// newDynamicTimeout returns a new dynamic timeout initialized with timeout value
@ -57,6 +70,10 @@ func (dt *dynamicTimeout) Timeout() time.Duration {
return time.Duration(atomic.LoadInt64(&dt.timeout))
}
func (dt *dynamicTimeout) RetryInterval() time.Duration {
return dt.retryInterval
}
// LogSuccess logs the duration of a successful action that
// did not hit the timeout
func (dt *dynamicTimeout) LogSuccess(duration time.Duration) {

View File

@ -166,7 +166,8 @@ func (di *distLockInstance) GetLock(ctx context.Context, timeout *dynamicTimeout
newCtx, cancel := context.WithCancel(ctx)
if !di.rwMutex.GetLock(newCtx, cancel, di.opsID, lockSource, dsync.Options{
Timeout: timeout.Timeout(),
Timeout: timeout.Timeout(),
RetryInterval: timeout.RetryInterval(),
}) {
timeout.LogFailure()
cancel()
@ -195,7 +196,8 @@ func (di *distLockInstance) GetRLock(ctx context.Context, timeout *dynamicTimeou
newCtx, cancel := context.WithCancel(ctx)
if !di.rwMutex.GetRLock(ctx, cancel, di.opsID, lockSource, dsync.Options{
Timeout: timeout.Timeout(),
Timeout: timeout.Timeout(),
RetryInterval: timeout.RetryInterval(),
}) {
timeout.LogFailure()
cancel()

View File

@ -59,7 +59,8 @@ const (
// dRWMutexRefreshInterval - default the interval between two refresh calls
drwMutexRefreshInterval = 10 * time.Second
lockRetryInterval = 1 * time.Second
// maximum time to sleep before retrying a failed blocking lock()
lockRetryInterval = 50 * time.Millisecond
drwMutexInfinite = 1<<63 - 1
)
@ -142,7 +143,8 @@ func (dm *DRWMutex) Lock(id, source string) {
// Options lock options.
type Options struct {
Timeout time.Duration
Timeout time.Duration
RetryInterval time.Duration
}
// GetLock tries to get a write lock on dm before the timeout elapses.
@ -236,7 +238,11 @@ func (dm *DRWMutex) lockBlocking(ctx context.Context, lockLossCallback func(), i
return locked
}
time.Sleep(time.Duration(dm.rng.Float64() * float64(dm.lockRetryInterval)))
lockRetryInterval := dm.lockRetryInterval
if opts.RetryInterval > 0 {
lockRetryInterval = opts.RetryInterval
}
time.Sleep(time.Duration(dm.rng.Float64() * float64(lockRetryInterval)))
}
}
}