Merge pull request #4 from treydock/memsw

Replace swap metrics with memsw to describe the raw values
This commit is contained in:
treydock 2020-02-27 11:39:12 -05:00 committed by GitHub
commit 720706e742
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 36 additions and 36 deletions

View File

@ -54,9 +54,9 @@ cgroup_info{cgroup="/user.slice/user-20821.slice",uid="20821",username="tdockend
cgroup_memory_fail_count{cgroup="/user.slice/user-20821.slice"} 0
cgroup_memory_total_bytes{cgroup="/user.slice/user-20821.slice"} 6.8719476736e+10
cgroup_memory_used_bytes{cgroup="/user.slice/user-20821.slice"} 6.90176e+06
cgroup_swap_fail_count{cgroup="/user.slice/user-20821.slice"} 0
cgroup_swap_total_bytes{cgroup="/user.slice/user-20821.slice"} 9.223371968135295e+18
cgroup_swap_used_bytes{cgroup="/user.slice/user-20821.slice"} 0
cgroup_memsw_fail_count{cgroup="/user.slice/user-20821.slice"} 0
cgroup_memsw_total_bytes{cgroup="/user.slice/user-20821.slice"} 9.223371968135295e+18
cgroup_memsw_used_bytes{cgroup="/user.slice/user-20821.slice"} 0
```
Example of metrics exposed by this exporter when looking at `/slurm` paths:
@ -70,9 +70,9 @@ cgroup_info{cgroup="/slurm/uid_20821/job_12",jobid="12",uid="20821",username="td
cgroup_memory_fail_count{cgroup="/slurm/uid_20821/job_12"} 0
cgroup_memory_total_bytes{cgroup="/slurm/uid_20821/job_12"} 2.147483648e+09
cgroup_memory_used_bytes{cgroup="/slurm/uid_20821/job_12"} 315392
cgroup_swap_fail_count{cgroup="/slurm/uid_20821/job_12"} 0
cgroup_swap_total_bytes{cgroup="/slurm/uid_20821/job_12"} 2.147483648e+09
cgroup_swap_used_bytes{cgroup="/slurm/uid_20821/job_12"} 315392
cgroup_memsw_fail_count{cgroup="/slurm/uid_20821/job_12"} 0
cgroup_memsw_total_bytes{cgroup="/slurm/uid_20821/job_12"} 2.147483648e+09
cgroup_memsw_used_bytes{cgroup="/slurm/uid_20821/job_12"} 315392
```
Example of metrics exposed by this exporter when looking at `/torque` paths:
@ -86,7 +86,7 @@ cgroup_info{cgroup="/torque/1182958.batch.example.com",jobid="1182958",uid="",us
cgroup_memory_fail_count{cgroup="/torque/1182958.batch.example.com"} 0
cgroup_memory_total_bytes{cgroup="/torque/1182958.batch.example.com"} 1.96755132416e+11
cgroup_memory_used_bytes{cgroup="/torque/1182958.batch.example.com"} 5.3434466304e+10
cgroup_swap_fail_count{cgroup="/torque/1182958.batch.example.com"} 0
cgroup_swap_total_bytes{cgroup="/torque/1182958.batch.example.com"} 1.96755132416e+11
cgroup_swap_used_bytes{cgroup="/torque/1182958.batch.example.com"} 5.3434466304e+10
cgroup_memsw_fail_count{cgroup="/torque/1182958.batch.example.com"} 0
cgroup_memsw_total_bytes{cgroup="/torque/1182958.batch.example.com"} 1.96755132416e+11
cgroup_memsw_used_bytes{cgroup="/torque/1182958.batch.example.com"} 5.3434466304e+10
```

View File

@ -53,9 +53,9 @@ type CgroupMetric struct {
memoryUsed float64
memoryTotal float64
memoryFailCount float64
swapUsed float64
swapTotal float64
swapFailCount float64
memswUsed float64
memswTotal float64
memswFailCount float64
userslice bool
job bool
uid string
@ -74,9 +74,9 @@ type Exporter struct {
memoryUsed *prometheus.Desc
memoryTotal *prometheus.Desc
memoryFailCount *prometheus.Desc
swapUsed *prometheus.Desc
swapTotal *prometheus.Desc
swapFailCount *prometheus.Desc
memswUsed *prometheus.Desc
memswTotal *prometheus.Desc
memswFailCount *prometheus.Desc
info *prometheus.Desc
}
@ -229,11 +229,11 @@ func NewExporter(paths []string) *Exporter {
"Memory total given to cgroup in bytes", []string{"cgroup"}, nil),
memoryFailCount: prometheus.NewDesc(prometheus.BuildFQName(namespace, "memory", "fail_count"),
"Memory fail count", []string{"cgroup"}, nil),
swapUsed: prometheus.NewDesc(prometheus.BuildFQName(namespace, "swap", "used_bytes"),
memswUsed: prometheus.NewDesc(prometheus.BuildFQName(namespace, "memsw", "used_bytes"),
"Swap used in bytes", []string{"cgroup"}, nil),
swapTotal: prometheus.NewDesc(prometheus.BuildFQName(namespace, "swap", "total_bytes"),
memswTotal: prometheus.NewDesc(prometheus.BuildFQName(namespace, "memsw", "total_bytes"),
"Swap total given to cgroup in bytes", []string{"cgroup"}, nil),
swapFailCount: prometheus.NewDesc(prometheus.BuildFQName(namespace, "swap", "fail_count"),
memswFailCount: prometheus.NewDesc(prometheus.BuildFQName(namespace, "memsw", "fail_count"),
"Swap fail count", []string{"cgroup"}, nil),
info: prometheus.NewDesc(prometheus.BuildFQName(namespace, "", "info"),
"User slice information", []string{"cgroup", "username", "uid", "jobid"}, nil),
@ -290,9 +290,9 @@ func (e *Exporter) collect() ([]CgroupMetric, error) {
metric.memoryUsed = float64(stats.Memory.Usage.Usage)
metric.memoryTotal = float64(stats.Memory.Usage.Limit)
metric.memoryFailCount = float64(stats.Memory.Usage.Failcnt)
metric.swapUsed = float64(stats.Memory.Swap.Usage) - metric.memoryUsed
metric.swapTotal = float64(stats.Memory.Swap.Limit) - metric.memoryTotal
metric.swapFailCount = float64(stats.Memory.Swap.Failcnt) - metric.memoryFailCount
metric.memswUsed = float64(stats.Memory.Swap.Usage)
metric.memswTotal = float64(stats.Memory.Swap.Limit)
metric.memswFailCount = float64(stats.Memory.Swap.Failcnt)
if cpus, err := getCPUs(name); err == nil {
metric.cpus = cpus
}
@ -312,9 +312,9 @@ func (e *Exporter) Describe(ch chan<- *prometheus.Desc) {
ch <- e.memoryUsed
ch <- e.memoryTotal
ch <- e.memoryFailCount
ch <- e.swapUsed
ch <- e.swapTotal
ch <- e.swapFailCount
ch <- e.memswUsed
ch <- e.memswTotal
ch <- e.memswFailCount
}
func (e *Exporter) Collect(ch chan<- prometheus.Metric) {
@ -330,9 +330,9 @@ func (e *Exporter) Collect(ch chan<- prometheus.Metric) {
ch <- prometheus.MustNewConstMetric(e.memoryUsed, prometheus.GaugeValue, m.memoryUsed, m.name)
ch <- prometheus.MustNewConstMetric(e.memoryTotal, prometheus.GaugeValue, m.memoryTotal, m.name)
ch <- prometheus.MustNewConstMetric(e.memoryFailCount, prometheus.GaugeValue, m.memoryFailCount, m.name)
ch <- prometheus.MustNewConstMetric(e.swapUsed, prometheus.GaugeValue, m.swapUsed, m.name)
ch <- prometheus.MustNewConstMetric(e.swapTotal, prometheus.GaugeValue, m.swapTotal, m.name)
ch <- prometheus.MustNewConstMetric(e.swapFailCount, prometheus.GaugeValue, m.swapFailCount, m.name)
ch <- prometheus.MustNewConstMetric(e.memswUsed, prometheus.GaugeValue, m.memswUsed, m.name)
ch <- prometheus.MustNewConstMetric(e.memswTotal, prometheus.GaugeValue, m.memswTotal, m.name)
ch <- prometheus.MustNewConstMetric(e.memswFailCount, prometheus.GaugeValue, m.memswFailCount, m.name)
if m.userslice || m.job {
ch <- prometheus.MustNewConstMetric(e.info, prometheus.GaugeValue, 1, m.name, m.username, m.uid, m.jobid)
}

View File

@ -102,13 +102,13 @@ func TestCollectUserSlice(t *testing.T) {
if val := metrics[0].memoryFailCount; val != 0 {
t.Errorf("Unexpected value for memoryFailCount, got %v", val)
}
if val := metrics[0].swapUsed; val != 0 {
if val := metrics[0].memswUsed; val != 8081408 {
t.Errorf("Unexpected value for swapUsed, got %v", val)
}
if val := metrics[0].swapTotal; val != 9223371968135295000 {
if val := metrics[0].memswTotal; val != 9.223372036854772e+18 {
t.Errorf("Unexpected value for swapTotal, got %v", val)
}
if val := metrics[0].swapFailCount; val != 0 {
if val := metrics[0].memswFailCount; val != 0 {
t.Errorf("Unexpected value for swapFailCount, got %v", val)
}
if val := metrics[0].uid; val != "20821" {
@ -149,13 +149,13 @@ func TestCollectSLURM(t *testing.T) {
if val := metrics[0].memoryFailCount; val != 0 {
t.Errorf("Unexpected value for memoryFailCount, got %v", val)
}
if val := metrics[0].swapUsed; val != 0 {
if val := metrics[0].memswUsed; val != 356352 {
t.Errorf("Unexpected value for swapUsed, got %v", val)
}
if val := metrics[0].swapTotal; val != 0 {
if val := metrics[0].memswTotal; val != 2147483648 {
t.Errorf("Unexpected value for swapTotal, got %v", val)
}
if val := metrics[0].swapFailCount; val != 0 {
if val := metrics[0].memswFailCount; val != 0 {
t.Errorf("Unexpected value for swapFailCount, got %v", val)
}
if val := metrics[0].uid; val != "20821" {
@ -199,13 +199,13 @@ func TestCollectTorque(t *testing.T) {
if val := metrics[0].memoryFailCount; val != 0 {
t.Errorf("Unexpected value for memoryFailCount, got %v", val)
}
if val := metrics[0].swapUsed; val != 0 {
if val := metrics[0].memswUsed; val != 82553999360 {
t.Errorf("Unexpected value for swapUsed, got %v", val)
}
if val := metrics[0].swapTotal; val != 0 {
if val := metrics[0].memswTotal; val != 196755132416 {
t.Errorf("Unexpected value for swapTotal, got %v", val)
}
if val := metrics[0].swapFailCount; val != 0 {
if val := metrics[0].memswFailCount; val != 0 {
t.Errorf("Unexpected value for swapFailCount, got %v", val)
}
if val := metrics[0].uid; val != "" {