Wait one minute after startup to restart decommissioning (#19645)

Typically not all drives are connected, so we delay 3 minutes before resuming.
This greatly reduces risk of starting to list unconnected drives, or drives we risk being disconnected soon.

This delay is not applied when starting with an admin call.
This commit is contained in:
Klaus Post 2024-05-01 08:18:21 -07:00 committed by GitHub
parent 08ff702434
commit dbfb5e797b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 11 additions and 11 deletions

View File

@ -535,6 +535,10 @@ func (z *erasureServerPools) Init(ctx context.Context) error {
if len(poolIndices) > 0 && globalEndpoints[poolIndices[0]].Endpoints[0].IsLocal {
go func() {
// Resume decommissioning of pools, but wait 3 minutes for cluster to stabilize.
if err := sleepContext(ctx, 3*time.Minute); err != nil {
return
}
r := rand.New(rand.NewSource(time.Now().UnixNano()))
for {
if err := z.Decommission(ctx, poolIndices...); err != nil {

View File

@ -1127,16 +1127,12 @@ func ptr[T any](a T) *T {
return &a
}
func max(a, b int) int {
if a > b {
return a
// sleepContext sleeps for d duration or until ctx is done.
func sleepContext(ctx context.Context, d time.Duration) error {
select {
case <-ctx.Done():
return ctx.Err()
case <-time.After(d):
}
return b
}
func min(a, b int) int {
if a < b {
return a
}
return b
return nil
}