Add cluster audit metrics in metrics-v3 (#19514)

endpoint: /minio/metrics/v3/cluster/audit
metrics:
- failed_messages (counter)
- total_messages (counter)
- target_queue_length (gauge)
This commit is contained in:
Shireesh Anjal 2024-04-17 14:48:02 +05:30 committed by GitHub
parent 6df76ca73c
commit ca5fab8656
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 75 additions and 0 deletions

View File

@ -0,0 +1,57 @@
// Copyright (c) 2015-2024 MinIO, Inc.
//
// This file is part of MinIO Object Storage stack
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package cmd
import (
"context"
"github.com/minio/minio/internal/logger"
)
const (
auditFailedMessages = "failed_messages"
auditTargetQueueLength = "target_queue_length"
auditTotalMessages = "total_messages"
targetID = "target_id"
)
var (
auditFailedMessagesMD = NewCounterMD(auditFailedMessages,
"Total number of messages that failed to send since start",
targetID)
auditTargetQueueLengthMD = NewGaugeMD(auditTargetQueueLength,
"Number of unsent messages in queue for target",
targetID)
auditTotalMessagesMD = NewCounterMD(auditTotalMessages,
"Total number of messages sent since start",
targetID)
)
// loadClusterAuditMetrics - `MetricsLoaderFn` for cluster audit
// such as failed messages and total messages.
func loadClusterAuditMetrics(_ context.Context, m MetricValues, c *metricsCache) error {
audit := logger.CurrentStats()
for id, st := range audit {
labels := []string{targetID, id}
m.Set(auditFailedMessages, float64(st.FailedMessages), labels...)
m.Set(auditTargetQueueLength, float64(st.QueueLength), labels...)
m.Set(auditTotalMessages, float64(st.TotalMessages), labels...)
}
return nil
}

View File

@ -43,6 +43,7 @@ const (
clusterUsageObjectsCollectorPath collectorPath = "/cluster/usage/objects"
clusterUsageBucketsCollectorPath collectorPath = "/cluster/usage/buckets"
clusterErasureSetCollectorPath collectorPath = "/cluster/erasure-set"
clusterAuditCollectorPath collectorPath = "/cluster/audit"
)
const (
@ -218,6 +219,15 @@ func newMetricGroups(r *prometheus.Registry) *metricsV3Collection {
loadClusterErasureSetMetrics,
)
clusterAuditMG := NewMetricsGroup(clusterAuditCollectorPath,
[]MetricDescriptor{
auditFailedMessagesMD,
auditTargetQueueLengthMD,
auditTotalMessagesMD,
},
loadClusterAuditMetrics,
)
allMetricGroups := []*MetricsGroup{
apiRequestsMG,
apiBucketMG,
@ -230,6 +240,7 @@ func newMetricGroups(r *prometheus.Registry) *metricsV3Collection {
clusterUsageObjectsMG,
clusterUsageBucketsMG,
clusterErasureSetMG,
clusterAuditMG,
}
// Bucket metrics are special, they always include the bucket label. These

View File

@ -164,6 +164,13 @@ The standard metrics groups for ProcessCollector and GoCollector are not shown b
| `minio_cluster_health_capacity_usable_total_bytes` | `gauge` | Total cluster usable storage capacity in bytes | |
| `minio_cluster_health_capacity_usable_free_bytes` | `gauge` | Total cluster usable storage free in bytes | |
### `/cluster/audit`
| Name | Type | Help | Labels |
|-------------------------------------------|-----------|----------------------------------------------------------|-------------|
| `minio_cluster_audit_failed_messages` | `counter` | Total number of messages that failed to send since start | `target_id` |
| `minio_cluster_audit_target_queue_length` | `gauge` | Number of unsent messages in queue for target | `target_id` |
| `minio_cluster_audit_total_messages` | `counter` | Total number of messages sent since start | `target_id` |
### `/cluster/usage/objects`