Read drive IO stats from sysfs instead of procfs (#19131)

Currently, we read from `/proc/diskstats` which is found to be
un-reliable in k8s environments. We can read from `sysfs` instead.

Also, cache the latest drive io stats to find the diff and update
the metrics.
This commit is contained in:
Praveen raj Mani 2024-02-27 01:04:50 +05:30 committed by GitHub
parent 2b5e4b853c
commit 30c2596512
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
13 changed files with 243 additions and 192 deletions

View File

@ -130,12 +130,6 @@ func collectLocalDisksMetrics(disks map[string]struct{}) map[string]madmin.DiskM
}
metrics := make(map[string]madmin.DiskMetric)
procStats, procErr := disk.GetAllDrivesIOStats()
if procErr != nil {
return metrics
}
storageInfo := objLayer.LocalStorageInfo(GlobalContext, true)
for _, d := range storageInfo.Disks {
if len(disks) != 0 {
@ -170,9 +164,8 @@ func collectLocalDisksMetrics(disks map[string]struct{}) map[string]madmin.DiskM
}
}
// get disk
if procErr == nil {
st := procStats[disk.DevID{Major: d.Major, Minor: d.Minor}]
st, err := disk.GetDriveStats(d.Major, d.Minor)
if err == nil {
dm.IOStats = madmin.DiskIOStats{
ReadIOs: st.ReadIOs,
ReadMerges: st.ReadMerges,

View File

@ -27,7 +27,6 @@ import (
"github.com/minio/madmin-go/v3"
"github.com/prometheus/client_golang/prometheus"
"github.com/shirou/gopsutil/v3/host"
)
const (
@ -85,9 +84,9 @@ var (
resourceMetricsGroups []*MetricsGroup
// initial values for drives (at the time of server startup)
// used for calculating avg values for drive metrics
initialDriveStats map[string]madmin.DiskIOStats
initialDriveStatsMu sync.RWMutex
initialUptime uint64
latestDriveStats map[string]madmin.DiskIOStats
latestDriveStatsMu sync.RWMutex
lastDriveStatsRefresh time.Time
)
// PeerResourceMetrics represents the resource metrics
@ -147,7 +146,7 @@ func init() {
writesKBPerSec: "Kilobytes written per second on a drive",
readsAwait: "Average time for read requests to be served on a drive",
writesAwait: "Average time for write requests to be served on a drive",
percUtil: "Percentage of time the disk was busy since uptime",
percUtil: "Percentage of time the disk was busy",
usedBytes: "Used bytes on a drive",
totalBytes: "Total bytes on a drive",
usedInodes: "Total inodes used on a drive",
@ -219,35 +218,32 @@ func updateResourceMetrics(subSys MetricSubsystem, name MetricName, val float64,
resourceMetricsMap[subSys] = subsysMetrics
}
// updateDriveIOStats - Updates the drive IO stats by calculating the difference between the current
// and initial values. We cannot rely on host.Uptime here as it will not work in k8s environments, where
// it will return the pod's uptime but the disk metrics are always from the host (/proc/diskstats)
func updateDriveIOStats(currentStats madmin.DiskIOStats, initialStats madmin.DiskIOStats, labels map[string]string) {
// updateDriveIOStats - Updates the drive IO stats by calculating the difference between the current and latest updated values.
func updateDriveIOStats(currentStats madmin.DiskIOStats, latestStats madmin.DiskIOStats, labels map[string]string) {
sectorSize := uint64(512)
kib := float64(1 << 10)
uptime, _ := host.Uptime()
uptimeDiff := float64(uptime - initialUptime)
if uptimeDiff == 0 {
diffInSeconds := time.Now().UTC().Sub(lastDriveStatsRefresh).Seconds()
if diffInSeconds == 0 {
// too soon to update the stats
return
}
diffStats := madmin.DiskIOStats{
ReadIOs: currentStats.ReadIOs - initialStats.ReadIOs,
WriteIOs: currentStats.WriteIOs - initialStats.WriteIOs,
ReadTicks: currentStats.ReadTicks - initialStats.ReadTicks,
WriteTicks: currentStats.WriteTicks - initialStats.WriteTicks,
TotalTicks: currentStats.TotalTicks - initialStats.TotalTicks,
ReadSectors: currentStats.ReadSectors - initialStats.ReadSectors,
WriteSectors: currentStats.WriteSectors - initialStats.WriteSectors,
ReadIOs: currentStats.ReadIOs - latestStats.ReadIOs,
WriteIOs: currentStats.WriteIOs - latestStats.WriteIOs,
ReadTicks: currentStats.ReadTicks - latestStats.ReadTicks,
WriteTicks: currentStats.WriteTicks - latestStats.WriteTicks,
TotalTicks: currentStats.TotalTicks - latestStats.TotalTicks,
ReadSectors: currentStats.ReadSectors - latestStats.ReadSectors,
WriteSectors: currentStats.WriteSectors - latestStats.WriteSectors,
}
updateResourceMetrics(driveSubsystem, readsPerSec, float64(diffStats.ReadIOs)/uptimeDiff, labels, false)
updateResourceMetrics(driveSubsystem, readsPerSec, float64(diffStats.ReadIOs)/diffInSeconds, labels, false)
readKib := float64(diffStats.ReadSectors*sectorSize) / kib
updateResourceMetrics(driveSubsystem, readsKBPerSec, readKib/uptimeDiff, labels, false)
updateResourceMetrics(driveSubsystem, readsKBPerSec, readKib/diffInSeconds, labels, false)
updateResourceMetrics(driveSubsystem, writesPerSec, float64(diffStats.WriteIOs)/uptimeDiff, labels, false)
updateResourceMetrics(driveSubsystem, writesPerSec, float64(diffStats.WriteIOs)/diffInSeconds, labels, false)
writeKib := float64(diffStats.WriteSectors*sectorSize) / kib
updateResourceMetrics(driveSubsystem, writesKBPerSec, writeKib/uptimeDiff, labels, false)
updateResourceMetrics(driveSubsystem, writesKBPerSec, writeKib/diffInSeconds, labels, false)
rdAwait := 0.0
if diffStats.ReadIOs > 0 {
@ -260,18 +256,23 @@ func updateDriveIOStats(currentStats madmin.DiskIOStats, initialStats madmin.Dis
wrAwait = float64(diffStats.WriteTicks) / float64(diffStats.WriteIOs)
}
updateResourceMetrics(driveSubsystem, writesAwait, wrAwait, labels, false)
updateResourceMetrics(driveSubsystem, percUtil, float64(diffStats.TotalTicks)/(uptimeDiff*10), labels, false)
updateResourceMetrics(driveSubsystem, percUtil, float64(diffStats.TotalTicks)/(diffInSeconds*10), labels, false)
}
func collectDriveMetrics(m madmin.RealtimeMetrics) {
latestDriveStatsMu.Lock()
for d, dm := range m.ByDisk {
labels := map[string]string{"drive": d}
initialStats, ok := initialDriveStats[d]
latestStats, ok := latestDriveStats[d]
if !ok {
latestDriveStats[d] = dm.IOStats
continue
}
updateDriveIOStats(dm.IOStats, initialStats, labels)
updateDriveIOStats(dm.IOStats, latestStats, labels)
latestDriveStats[d] = dm.IOStats
}
lastDriveStatsRefresh = time.Now().UTC()
latestDriveStatsMu.Unlock()
globalLocalDrivesMu.RLock()
localDrives := cloneDrives(globalLocalDrives)
@ -361,29 +362,25 @@ func collectLocalResourceMetrics() {
collectDriveMetrics(m)
}
// populateInitialValues - populates the initial values
// for drive stats and host uptime
func populateInitialValues() {
initialDriveStatsMu.Lock()
func initLatestValues() {
m := collectLocalMetrics(madmin.MetricsDisk, collectMetricsOpts{
hosts: map[string]struct{}{
globalLocalNodeName: {},
},
})
initialDriveStats = map[string]madmin.DiskIOStats{}
latestDriveStatsMu.Lock()
latestDriveStats = map[string]madmin.DiskIOStats{}
for d, dm := range m.ByDisk {
initialDriveStats[d] = dm.IOStats
latestDriveStats[d] = dm.IOStats
}
initialUptime, _ = host.Uptime()
initialDriveStatsMu.Unlock()
lastDriveStatsRefresh = time.Now().UTC()
latestDriveStatsMu.Unlock()
}
// startResourceMetricsCollection - starts the job for collecting resource metrics
func startResourceMetricsCollection() {
populateInitialValues()
initLatestValues()
resourceMetricsMapMu.Lock()
resourceMetricsMap = map[MetricSubsystem]ResourceMetrics{}

View File

@ -40,15 +40,6 @@ type Info struct {
NRRequests uint64
}
// DevID is the drive major and minor ids
type DevID struct {
Major uint32
Minor uint32
}
// AllDrivesIOStats is map between drive devices and IO stats
type AllDrivesIOStats map[DevID]IOStats
// IOStats contains stats of a single drive
type IOStats struct {
ReadIOs uint64

View File

@ -48,7 +48,7 @@ func GetInfo(path string, _ bool) (info Info, err error) {
return info, nil
}
// GetAllDrivesIOStats returns IO stats of all drives found in the machine
func GetAllDrivesIOStats() (info AllDrivesIOStats, err error) {
return nil, errors.New("operation unsupported")
// GetDriveStats returns IO stats of the drive by its major:minor
func GetDriveStats(major, minor uint32) (iostats IOStats, err error) {
return IOStats{}, errors.New("operation unsupported")
}

View File

@ -48,7 +48,7 @@ func GetInfo(path string, _ bool) (info Info, err error) {
return info, nil
}
// GetAllDrivesIOStats returns IO stats of all drives found in the machine
func GetAllDrivesIOStats() (info AllDrivesIOStats, err error) {
return nil, errors.New("operation unsupported")
// GetDriveStats returns IO stats of the drive by its major:minor
func GetDriveStats(major, minor uint32) (iostats IOStats, err error) {
return IOStats{}, errors.New("operation unsupported")
}

View File

@ -22,7 +22,9 @@ package disk
import (
"bufio"
"errors"
"fmt"
"io"
"os"
"path/filepath"
"strconv"
@ -109,125 +111,63 @@ func GetInfo(path string, firstTime bool) (info Info, err error) {
return info, nil
}
const (
statsPath = "/proc/diskstats"
)
// GetDriveStats returns IO stats of the drive by its major:minor
func GetDriveStats(major, minor uint32) (iostats IOStats, err error) {
return readDriveStats(fmt.Sprintf("/sys/dev/block/%v:%v/stat", major, minor))
}
// GetAllDrivesIOStats returns IO stats of all drives found in the machine
func GetAllDrivesIOStats() (info AllDrivesIOStats, err error) {
proc, err := os.Open(statsPath)
func readDriveStats(statsFile string) (iostats IOStats, err error) {
stats, err := readStat(statsFile)
if err != nil {
return IOStats{}, err
}
if len(stats) < 11 {
return IOStats{}, fmt.Errorf("found invalid format while reading %v", statsFile)
}
// refer https://www.kernel.org/doc/Documentation/block/stat.txt
iostats = IOStats{
ReadIOs: stats[0],
ReadMerges: stats[1],
ReadSectors: stats[2],
ReadTicks: stats[3],
WriteIOs: stats[4],
WriteMerges: stats[5],
WriteSectors: stats[6],
WriteTicks: stats[7],
CurrentIOs: stats[8],
TotalTicks: stats[9],
ReqTicks: stats[10],
}
// as per the doc, only 11 fields are guaranteed
// only set if available
if len(stats) > 14 {
iostats.DiscardIOs = stats[11]
iostats.DiscardMerges = stats[12]
iostats.DiscardSectors = stats[13]
iostats.DiscardTicks = stats[14]
}
return
}
func readStat(fileName string) (stats []uint64, err error) {
file, err := os.Open(fileName)
if err != nil {
return nil, err
}
defer proc.Close()
defer file.Close()
ret := make(AllDrivesIOStats)
sc := bufio.NewScanner(proc)
for sc.Scan() {
line := sc.Text()
fields := strings.Fields(line)
if len(fields) < 11 {
continue
}
var err error
var ds IOStats
ds.ReadIOs, err = strconv.ParseUint((fields[3]), 10, 64)
if err != nil {
return ret, err
}
ds.ReadMerges, err = strconv.ParseUint((fields[4]), 10, 64)
if err != nil {
return ret, err
}
ds.ReadSectors, err = strconv.ParseUint((fields[5]), 10, 64)
if err != nil {
return ret, err
}
ds.ReadTicks, err = strconv.ParseUint((fields[6]), 10, 64)
if err != nil {
return ret, err
}
ds.WriteIOs, err = strconv.ParseUint((fields[7]), 10, 64)
if err != nil {
return ret, err
}
ds.WriteMerges, err = strconv.ParseUint((fields[8]), 10, 64)
if err != nil {
return ret, err
}
ds.WriteSectors, err = strconv.ParseUint((fields[9]), 10, 64)
if err != nil {
return ret, err
}
ds.WriteTicks, err = strconv.ParseUint((fields[10]), 10, 64)
if err != nil {
return ret, err
}
if len(fields) > 11 {
ds.CurrentIOs, err = strconv.ParseUint((fields[11]), 10, 64)
if err != nil {
return ret, err
}
ds.TotalTicks, err = strconv.ParseUint((fields[12]), 10, 64)
if err != nil {
return ret, err
}
ds.ReqTicks, err = strconv.ParseUint((fields[13]), 10, 64)
if err != nil {
return ret, err
}
}
if len(fields) > 14 {
ds.DiscardIOs, err = strconv.ParseUint((fields[14]), 10, 64)
if err != nil {
return ret, err
}
ds.DiscardMerges, err = strconv.ParseUint((fields[15]), 10, 64)
if err != nil {
return ret, err
}
ds.DiscardSectors, err = strconv.ParseUint((fields[16]), 10, 64)
if err != nil {
return ret, err
}
ds.DiscardTicks, err = strconv.ParseUint((fields[17]), 10, 64)
if err != nil {
return ret, err
}
}
if len(fields) > 18 {
ds.FlushIOs, err = strconv.ParseUint((fields[18]), 10, 64)
if err != nil {
return ret, err
}
ds.FlushTicks, err = strconv.ParseUint((fields[19]), 10, 64)
if err != nil {
return ret, err
}
}
major, err := strconv.ParseUint((fields[0]), 10, 32)
if err != nil {
return ret, err
}
minor, err := strconv.ParseUint((fields[1]), 10, 32)
if err != nil {
return ret, err
}
ret[DevID{uint32(major), uint32(minor)}] = ds
}
if err := sc.Err(); err != nil {
s, err := bufio.NewReader(file).ReadString('\n')
if err != nil && !errors.Is(err, io.EOF) {
return nil, err
}
statLine := strings.TrimSpace(s)
for _, token := range strings.Fields(statLine) {
ui64, err := strconv.ParseUint(token, 10, 64)
if err != nil {
return nil, err
}
stats = append(stats, ui64)
}
return ret, nil
return stats, nil
}

View File

@ -83,7 +83,7 @@ func GetInfo(path string, _ bool) (info Info, err error) {
return info, nil
}
// GetAllDrivesIOStats returns IO stats of all drives found in the machine
func GetAllDrivesIOStats() (info AllDrivesIOStats, err error) {
return nil, errors.New("operation unsupported")
// GetDriveStats returns IO stats of the drive by its major:minor
func GetDriveStats(major, minor uint32) (iostats IOStats, err error) {
return IOStats{}, errors.New("operation unsupported")
}

View File

@ -83,7 +83,7 @@ func GetInfo(path string, _ bool) (info Info, err error) {
return info, nil
}
// GetAllDrivesIOStats returns IO stats of all drives found in the machine
func GetAllDrivesIOStats() (info AllDrivesIOStats, err error) {
return nil, errors.New("operation unsupported")
// GetDriveStats returns IO stats of the drive by its major:minor
func GetDriveStats(major, minor uint32) (iostats IOStats, err error) {
return IOStats{}, errors.New("operation unsupported")
}

View File

@ -48,7 +48,7 @@ func GetInfo(path string, _ bool) (info Info, err error) {
return info, nil
}
// GetAllDrivesIOStats returns IO stats of all drives found in the machine
func GetAllDrivesIOStats() (info AllDrivesIOStats, err error) {
return nil, errors.New("operation unsupported")
// GetDriveStats returns IO stats of the drive by its major:minor
func GetDriveStats(major, minor uint32) (iostats IOStats, err error) {
return IOStats{}, errors.New("operation unsupported")
}

View File

@ -48,7 +48,7 @@ func GetInfo(path string, _ bool) (info Info, err error) {
return info, nil
}
// GetAllDrivesIOStats returns IO stats of all drives found in the machine
func GetAllDrivesIOStats() (info AllDrivesIOStats, err error) {
return nil, errors.New("operation unsupported")
// GetDriveStats returns IO stats of the drive by its major:minor
func GetDriveStats(major, minor uint32) (iostats IOStats, err error) {
return IOStats{}, errors.New("operation unsupported")
}

View File

@ -48,7 +48,7 @@ func GetInfo(path string, _ bool) (info Info, err error) {
return info, nil
}
// GetAllDrivesIOStats returns IO stats of all drives found in the machine
func GetAllDrivesIOStats() (info AllDrivesIOStats, err error) {
return nil, errors.New("operation unsupported")
// GetDriveStats returns IO stats of the drive by its major:minor
func GetDriveStats(major, minor uint32) (iostats IOStats, err error) {
return IOStats{}, errors.New("operation unsupported")
}

130
internal/disk/stat_test.go Normal file
View File

@ -0,0 +1,130 @@
//go:build linux && !s390x && !arm && !386
// +build linux,!s390x,!arm,!386
// Copyright (c) 2015-2024 MinIO, Inc.
//
// This file is part of MinIO Object Storage stack
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package disk
import (
"os"
"reflect"
"runtime"
"testing"
)
func TestReadDriveStats(t *testing.T) {
if runtime.GOOS == "windows" {
t.Skip("skipping this test in windows")
}
testCases := []struct {
stat string
expectedIOStats IOStats
expectErr bool
}{
{
stat: "1432553 420084 66247626 2398227 7077314 8720147 157049224 7469810 0 7580552 9869354 46037 0 41695120 1315 0 0",
expectedIOStats: IOStats{
ReadIOs: 1432553,
ReadMerges: 420084,
ReadSectors: 66247626,
ReadTicks: 2398227,
WriteIOs: 7077314,
WriteMerges: 8720147,
WriteSectors: 157049224,
WriteTicks: 7469810,
CurrentIOs: 0,
TotalTicks: 7580552,
ReqTicks: 9869354,
DiscardIOs: 46037,
DiscardMerges: 0,
DiscardSectors: 41695120,
DiscardTicks: 1315,
FlushIOs: 0,
FlushTicks: 0,
},
expectErr: false,
},
{
stat: "1432553 420084 66247626 2398227 7077314 8720147 157049224 7469810 0 7580552 9869354 46037 0 41695120 1315",
expectedIOStats: IOStats{
ReadIOs: 1432553,
ReadMerges: 420084,
ReadSectors: 66247626,
ReadTicks: 2398227,
WriteIOs: 7077314,
WriteMerges: 8720147,
WriteSectors: 157049224,
WriteTicks: 7469810,
CurrentIOs: 0,
TotalTicks: 7580552,
ReqTicks: 9869354,
DiscardIOs: 46037,
DiscardMerges: 0,
DiscardSectors: 41695120,
DiscardTicks: 1315,
},
expectErr: false,
},
{
stat: "1432553 420084 66247626 2398227 7077314 8720147 157049224 7469810 0 7580552 9869354",
expectedIOStats: IOStats{
ReadIOs: 1432553,
ReadMerges: 420084,
ReadSectors: 66247626,
ReadTicks: 2398227,
WriteIOs: 7077314,
WriteMerges: 8720147,
WriteSectors: 157049224,
WriteTicks: 7469810,
CurrentIOs: 0,
TotalTicks: 7580552,
ReqTicks: 9869354,
},
expectErr: false,
},
{
stat: "1432553 420084 66247626 2398227",
expectedIOStats: IOStats{},
expectErr: true,
},
}
for _, testCase := range testCases {
testCase := testCase
t.Run("", func(t *testing.T) {
tmpfile, err := os.CreateTemp("", "testfile")
if err != nil {
t.Error(err)
}
tmpfile.WriteString(testCase.stat)
tmpfile.Sync()
tmpfile.Close()
iostats, err := readDriveStats(tmpfile.Name())
if err != nil && !testCase.expectErr {
t.Fatalf("unexpected err; %v", err)
}
if testCase.expectErr && err == nil {
t.Fatal("expected to fail but err is nil")
}
if !reflect.DeepEqual(iostats, testCase.expectedIOStats) {
t.Fatalf("expected iostats: %v but got %v", testCase.expectedIOStats, iostats)
}
})
}
}

View File

@ -108,7 +108,7 @@ func GetInfo(path string, _ bool) (info Info, err error) {
return info, nil
}
// GetAllDrivesIOStats returns IO stats of all drives found in the machine
func GetAllDrivesIOStats() (info AllDrivesIOStats, err error) {
return nil, errors.New("operation unsupported")
// GetDriveStats returns IO stats of the drive by its major:minor
func GetDriveStats(major, minor uint32) (iostats IOStats, err error) {
return IOStats{}, errors.New("operation unsupported")
}