Export tier metrics (#18678)

minio_node_tier_ttlb_seconds - Distribution of time to last byte for streaming objects from warm tier
minio_node_tier_requests_success - Number of requests to download object from warm tier that were successful
minio_node_tier_requests_failure - Number of requests to download object from warm tier that failed
This commit is contained in:
Krishnan Parthasarathi 2023-12-20 20:13:40 -08:00 committed by GitHub
parent b1a109a611
commit 56b7045c20
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 174 additions and 2 deletions

View File

@ -507,9 +507,13 @@ func auditTierActions(ctx context.Context, tier string, bytes int64) func(err er
}
if err == nil {
op.TimeToResponseNS = time.Since(startTime).Nanoseconds()
since := time.Since(startTime)
op.TimeToResponseNS = since.Nanoseconds()
globalTierMetrics.Observe(tier, since)
globalTierMetrics.logSuccess(tier)
} else {
op.Error = err.Error()
globalTierMetrics.logFailure(tier)
}
logger.GetReqInfo(ctx).AppendTags("tierStats", op)

View File

@ -90,6 +90,7 @@ func init() {
getNetworkMetrics(),
getMinioVersionMetrics(),
getS3TTFBMetric(),
getTierMetrics(),
getNotificationMetrics(),
getDistLockMetrics(),
getIAMNodeMetrics(),
@ -155,6 +156,7 @@ const (
usageSubsystem MetricSubsystem = "usage"
quotaSubsystem MetricSubsystem = "quota"
ilmSubsystem MetricSubsystem = "ilm"
tierSubsystem MetricSubsystem = "tier"
scannerSubsystem MetricSubsystem = "scanner"
iamSubsystem MetricSubsystem = "iam"
kmsSubsystem MetricSubsystem = "kms"
@ -246,6 +248,7 @@ const (
sizeDistribution = "size_distribution"
versionDistribution = "version_distribution"
ttfbDistribution = "seconds_distribution"
ttlbDistribution = "ttlb_seconds_distribution"
lastActivityTime = "last_activity_nano_seconds"
startTime = "starttime_seconds"
@ -262,6 +265,9 @@ const (
transitionedObjects MetricName = "transitioned_objects"
transitionedVersions MetricName = "transitioned_versions"
tierRequestsSuccess MetricName = "requests_success"
tierRequestsFailure MetricName = "requests_failure"
kmsOnline = "online"
kmsRequestsSuccess = "request_success"
kmsRequestsError = "request_error"
@ -1658,6 +1664,16 @@ func getS3TTFBMetric() *MetricsGroup {
return mg
}
func getTierMetrics() *MetricsGroup {
mg := &MetricsGroup{
cacheInterval: 10 * time.Second,
}
mg.RegisterRead(func(ctx context.Context) []Metric {
return globalTierMetrics.Report()
})
return mg
}
func getTransitionPendingTasksMD() MetricDescription {
return MetricDescription{
Namespace: nodeMetricNamespace,

View File

@ -1,4 +1,4 @@
// Copyright (c) 2015-2021 MinIO, Inc.
// Copyright (c) 2015-2023 MinIO, Inc.
//
// This file is part of MinIO Object Storage stack
//
@ -27,11 +27,13 @@ import (
"path"
"strings"
"sync"
"time"
"github.com/minio/madmin-go/v3"
"github.com/minio/minio/internal/crypto"
"github.com/minio/minio/internal/hash"
"github.com/minio/minio/internal/kms"
"github.com/prometheus/client_golang/prometheus"
)
//go:generate msgp -file $GOFILE
@ -80,6 +82,96 @@ type TierConfigMgr struct {
Tiers map[string]madmin.TierConfig `json:"tiers"`
}
type tierMetrics struct {
sync.RWMutex // protects requestsCount only
requestsCount map[string]struct {
success int64
failure int64
}
histogram *prometheus.HistogramVec
}
var globalTierMetrics = tierMetrics{
requestsCount: make(map[string]struct {
success int64
failure int64
}),
histogram: prometheus.NewHistogramVec(prometheus.HistogramOpts{
Name: "tier_ttlb_seconds",
Help: "Time taken by requests served by warm tier",
Buckets: []float64{0.01, 0.1, 1, 2, 5, 10, 60, 5 * 60, 15 * 60, 30 * 60},
}, []string{"tier"}),
}
func (t *tierMetrics) Observe(tier string, dur time.Duration) {
t.histogram.With(prometheus.Labels{"tier": tier}).Observe(dur.Seconds())
}
func (t *tierMetrics) logSuccess(tier string) {
t.Lock()
defer t.Unlock()
stat := t.requestsCount[tier]
stat.success++
t.requestsCount[tier] = stat
}
func (t *tierMetrics) logFailure(tier string) {
t.Lock()
defer t.Unlock()
stat := t.requestsCount[tier]
stat.failure++
t.requestsCount[tier] = stat
}
var (
// {minio_node}_{tier}_{ttlb_seconds_distribution}
tierTTLBMD = MetricDescription{
Namespace: nodeMetricNamespace,
Subsystem: tierSubsystem,
Name: ttlbDistribution,
Help: "Distribution of time to last byte for objects downloaded from warm tier",
Type: gaugeMetric,
}
// {minio_node}_{tier}_{requests_success}
tierRequestsSuccessMD = MetricDescription{
Namespace: nodeMetricNamespace,
Subsystem: tierSubsystem,
Name: tierRequestsSuccess,
Help: "Number of requests to download object from warm tier that were successful",
Type: counterMetric,
}
// {minio_node}_{tier}_{requests_failure}
tierRequestsFailureMD = MetricDescription{
Namespace: nodeMetricNamespace,
Subsystem: tierSubsystem,
Name: tierRequestsFailure,
Help: "Number of requests to download object from warm tier that failed",
Type: counterMetric,
}
)
func (t *tierMetrics) Report() []Metric {
metrics := getHistogramMetrics(t.histogram, tierTTLBMD)
t.RLock()
defer t.RUnlock()
for tier, stat := range t.requestsCount {
metrics = append(metrics, Metric{
Description: tierRequestsSuccessMD,
Value: float64(stat.success),
VariableLabels: map[string]string{"tier": tier},
})
metrics = append(metrics, Metric{
Description: tierRequestsFailureMD,
Value: float64(stat.failure),
VariableLabels: map[string]string{"tier": tier},
})
}
return metrics
}
// IsTierValid returns true if there exists a remote tier by name tierName,
// otherwise returns false.
func (config *TierConfigMgr) IsTierValid(tierName string) bool {

52
cmd/tier_test.go Normal file
View File

@ -0,0 +1,52 @@
// Copyright (c) 2015-2023 MinIO, Inc.
//
// This file is part of MinIO Object Storage stack
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package cmd
import (
"testing"
"time"
)
func TestTierMetrics(t *testing.T) {
tier := "WARM-1"
globalTierMetrics.Observe(tier, 200*time.Millisecond)
expSuccess := 10
expFailure := 5
for i := 0; i < expSuccess; i++ {
globalTierMetrics.logSuccess(tier)
}
for i := 0; i < expFailure; i++ {
globalTierMetrics.logFailure(tier)
}
metrics := globalTierMetrics.Report()
var succ, fail float64
for _, metric := range metrics {
switch metric.Description.Name {
case tierRequestsSuccess:
succ += metric.Value
case tierRequestsFailure:
fail += metric.Value
}
}
if int(succ) != expSuccess {
t.Fatalf("Expected %d successes but got %f", expSuccess, succ)
}
if int(fail) != expFailure {
t.Fatalf("Expected %d failures but got %f", expFailure, fail)
}
}

View File

@ -200,6 +200,14 @@ For deployments with [bucket](https://min.io/docs/minio/linux/administration/buc
| `minio_node_ilm_transition_missed_immediate_tasks` | Number of missed immediate ILM transition tasks. |
| `minio_node_ilm_versions_scanned` | Total number of object versions checked for ilm actions since server start. |
## Tier Metrics
| Name | Description |
|:---------------------------------------------------|:----------------------------------------------------------------------------|
| `minio_node_tier_tier_ttlb_seconds_distribution` | Distribution of time to last byte for objects downloaded from warm tier |
| `minio_node_tier_requests_success` | Number of requests to download object from warm tier that were successful |
| `minio_node_tier_requests_failure` | Number of requests to download object from warm tier that were failure |
## System Metrics
| Name | Description |