Merge pull request #4 from treydock/memsw
Replace swap metrics with memsw to describe the raw values
This commit is contained in:
commit
720706e742
18
README.md
18
README.md
|
@ -54,9 +54,9 @@ cgroup_info{cgroup="/user.slice/user-20821.slice",uid="20821",username="tdockend
|
|||
cgroup_memory_fail_count{cgroup="/user.slice/user-20821.slice"} 0
|
||||
cgroup_memory_total_bytes{cgroup="/user.slice/user-20821.slice"} 6.8719476736e+10
|
||||
cgroup_memory_used_bytes{cgroup="/user.slice/user-20821.slice"} 6.90176e+06
|
||||
cgroup_swap_fail_count{cgroup="/user.slice/user-20821.slice"} 0
|
||||
cgroup_swap_total_bytes{cgroup="/user.slice/user-20821.slice"} 9.223371968135295e+18
|
||||
cgroup_swap_used_bytes{cgroup="/user.slice/user-20821.slice"} 0
|
||||
cgroup_memsw_fail_count{cgroup="/user.slice/user-20821.slice"} 0
|
||||
cgroup_memsw_total_bytes{cgroup="/user.slice/user-20821.slice"} 9.223371968135295e+18
|
||||
cgroup_memsw_used_bytes{cgroup="/user.slice/user-20821.slice"} 0
|
||||
```
|
||||
|
||||
Example of metrics exposed by this exporter when looking at `/slurm` paths:
|
||||
|
@ -70,9 +70,9 @@ cgroup_info{cgroup="/slurm/uid_20821/job_12",jobid="12",uid="20821",username="td
|
|||
cgroup_memory_fail_count{cgroup="/slurm/uid_20821/job_12"} 0
|
||||
cgroup_memory_total_bytes{cgroup="/slurm/uid_20821/job_12"} 2.147483648e+09
|
||||
cgroup_memory_used_bytes{cgroup="/slurm/uid_20821/job_12"} 315392
|
||||
cgroup_swap_fail_count{cgroup="/slurm/uid_20821/job_12"} 0
|
||||
cgroup_swap_total_bytes{cgroup="/slurm/uid_20821/job_12"} 2.147483648e+09
|
||||
cgroup_swap_used_bytes{cgroup="/slurm/uid_20821/job_12"} 315392
|
||||
cgroup_memsw_fail_count{cgroup="/slurm/uid_20821/job_12"} 0
|
||||
cgroup_memsw_total_bytes{cgroup="/slurm/uid_20821/job_12"} 2.147483648e+09
|
||||
cgroup_memsw_used_bytes{cgroup="/slurm/uid_20821/job_12"} 315392
|
||||
```
|
||||
|
||||
Example of metrics exposed by this exporter when looking at `/torque` paths:
|
||||
|
@ -86,7 +86,7 @@ cgroup_info{cgroup="/torque/1182958.batch.example.com",jobid="1182958",uid="",us
|
|||
cgroup_memory_fail_count{cgroup="/torque/1182958.batch.example.com"} 0
|
||||
cgroup_memory_total_bytes{cgroup="/torque/1182958.batch.example.com"} 1.96755132416e+11
|
||||
cgroup_memory_used_bytes{cgroup="/torque/1182958.batch.example.com"} 5.3434466304e+10
|
||||
cgroup_swap_fail_count{cgroup="/torque/1182958.batch.example.com"} 0
|
||||
cgroup_swap_total_bytes{cgroup="/torque/1182958.batch.example.com"} 1.96755132416e+11
|
||||
cgroup_swap_used_bytes{cgroup="/torque/1182958.batch.example.com"} 5.3434466304e+10
|
||||
cgroup_memsw_fail_count{cgroup="/torque/1182958.batch.example.com"} 0
|
||||
cgroup_memsw_total_bytes{cgroup="/torque/1182958.batch.example.com"} 1.96755132416e+11
|
||||
cgroup_memsw_used_bytes{cgroup="/torque/1182958.batch.example.com"} 5.3434466304e+10
|
||||
```
|
||||
|
|
|
@ -53,9 +53,9 @@ type CgroupMetric struct {
|
|||
memoryUsed float64
|
||||
memoryTotal float64
|
||||
memoryFailCount float64
|
||||
swapUsed float64
|
||||
swapTotal float64
|
||||
swapFailCount float64
|
||||
memswUsed float64
|
||||
memswTotal float64
|
||||
memswFailCount float64
|
||||
userslice bool
|
||||
job bool
|
||||
uid string
|
||||
|
@ -74,9 +74,9 @@ type Exporter struct {
|
|||
memoryUsed *prometheus.Desc
|
||||
memoryTotal *prometheus.Desc
|
||||
memoryFailCount *prometheus.Desc
|
||||
swapUsed *prometheus.Desc
|
||||
swapTotal *prometheus.Desc
|
||||
swapFailCount *prometheus.Desc
|
||||
memswUsed *prometheus.Desc
|
||||
memswTotal *prometheus.Desc
|
||||
memswFailCount *prometheus.Desc
|
||||
info *prometheus.Desc
|
||||
}
|
||||
|
||||
|
@ -229,11 +229,11 @@ func NewExporter(paths []string) *Exporter {
|
|||
"Memory total given to cgroup in bytes", []string{"cgroup"}, nil),
|
||||
memoryFailCount: prometheus.NewDesc(prometheus.BuildFQName(namespace, "memory", "fail_count"),
|
||||
"Memory fail count", []string{"cgroup"}, nil),
|
||||
swapUsed: prometheus.NewDesc(prometheus.BuildFQName(namespace, "swap", "used_bytes"),
|
||||
memswUsed: prometheus.NewDesc(prometheus.BuildFQName(namespace, "memsw", "used_bytes"),
|
||||
"Swap used in bytes", []string{"cgroup"}, nil),
|
||||
swapTotal: prometheus.NewDesc(prometheus.BuildFQName(namespace, "swap", "total_bytes"),
|
||||
memswTotal: prometheus.NewDesc(prometheus.BuildFQName(namespace, "memsw", "total_bytes"),
|
||||
"Swap total given to cgroup in bytes", []string{"cgroup"}, nil),
|
||||
swapFailCount: prometheus.NewDesc(prometheus.BuildFQName(namespace, "swap", "fail_count"),
|
||||
memswFailCount: prometheus.NewDesc(prometheus.BuildFQName(namespace, "memsw", "fail_count"),
|
||||
"Swap fail count", []string{"cgroup"}, nil),
|
||||
info: prometheus.NewDesc(prometheus.BuildFQName(namespace, "", "info"),
|
||||
"User slice information", []string{"cgroup", "username", "uid", "jobid"}, nil),
|
||||
|
@ -290,9 +290,9 @@ func (e *Exporter) collect() ([]CgroupMetric, error) {
|
|||
metric.memoryUsed = float64(stats.Memory.Usage.Usage)
|
||||
metric.memoryTotal = float64(stats.Memory.Usage.Limit)
|
||||
metric.memoryFailCount = float64(stats.Memory.Usage.Failcnt)
|
||||
metric.swapUsed = float64(stats.Memory.Swap.Usage) - metric.memoryUsed
|
||||
metric.swapTotal = float64(stats.Memory.Swap.Limit) - metric.memoryTotal
|
||||
metric.swapFailCount = float64(stats.Memory.Swap.Failcnt) - metric.memoryFailCount
|
||||
metric.memswUsed = float64(stats.Memory.Swap.Usage)
|
||||
metric.memswTotal = float64(stats.Memory.Swap.Limit)
|
||||
metric.memswFailCount = float64(stats.Memory.Swap.Failcnt)
|
||||
if cpus, err := getCPUs(name); err == nil {
|
||||
metric.cpus = cpus
|
||||
}
|
||||
|
@ -312,9 +312,9 @@ func (e *Exporter) Describe(ch chan<- *prometheus.Desc) {
|
|||
ch <- e.memoryUsed
|
||||
ch <- e.memoryTotal
|
||||
ch <- e.memoryFailCount
|
||||
ch <- e.swapUsed
|
||||
ch <- e.swapTotal
|
||||
ch <- e.swapFailCount
|
||||
ch <- e.memswUsed
|
||||
ch <- e.memswTotal
|
||||
ch <- e.memswFailCount
|
||||
}
|
||||
|
||||
func (e *Exporter) Collect(ch chan<- prometheus.Metric) {
|
||||
|
@ -330,9 +330,9 @@ func (e *Exporter) Collect(ch chan<- prometheus.Metric) {
|
|||
ch <- prometheus.MustNewConstMetric(e.memoryUsed, prometheus.GaugeValue, m.memoryUsed, m.name)
|
||||
ch <- prometheus.MustNewConstMetric(e.memoryTotal, prometheus.GaugeValue, m.memoryTotal, m.name)
|
||||
ch <- prometheus.MustNewConstMetric(e.memoryFailCount, prometheus.GaugeValue, m.memoryFailCount, m.name)
|
||||
ch <- prometheus.MustNewConstMetric(e.swapUsed, prometheus.GaugeValue, m.swapUsed, m.name)
|
||||
ch <- prometheus.MustNewConstMetric(e.swapTotal, prometheus.GaugeValue, m.swapTotal, m.name)
|
||||
ch <- prometheus.MustNewConstMetric(e.swapFailCount, prometheus.GaugeValue, m.swapFailCount, m.name)
|
||||
ch <- prometheus.MustNewConstMetric(e.memswUsed, prometheus.GaugeValue, m.memswUsed, m.name)
|
||||
ch <- prometheus.MustNewConstMetric(e.memswTotal, prometheus.GaugeValue, m.memswTotal, m.name)
|
||||
ch <- prometheus.MustNewConstMetric(e.memswFailCount, prometheus.GaugeValue, m.memswFailCount, m.name)
|
||||
if m.userslice || m.job {
|
||||
ch <- prometheus.MustNewConstMetric(e.info, prometheus.GaugeValue, 1, m.name, m.username, m.uid, m.jobid)
|
||||
}
|
||||
|
|
|
@ -102,13 +102,13 @@ func TestCollectUserSlice(t *testing.T) {
|
|||
if val := metrics[0].memoryFailCount; val != 0 {
|
||||
t.Errorf("Unexpected value for memoryFailCount, got %v", val)
|
||||
}
|
||||
if val := metrics[0].swapUsed; val != 0 {
|
||||
if val := metrics[0].memswUsed; val != 8081408 {
|
||||
t.Errorf("Unexpected value for swapUsed, got %v", val)
|
||||
}
|
||||
if val := metrics[0].swapTotal; val != 9223371968135295000 {
|
||||
if val := metrics[0].memswTotal; val != 9.223372036854772e+18 {
|
||||
t.Errorf("Unexpected value for swapTotal, got %v", val)
|
||||
}
|
||||
if val := metrics[0].swapFailCount; val != 0 {
|
||||
if val := metrics[0].memswFailCount; val != 0 {
|
||||
t.Errorf("Unexpected value for swapFailCount, got %v", val)
|
||||
}
|
||||
if val := metrics[0].uid; val != "20821" {
|
||||
|
@ -149,13 +149,13 @@ func TestCollectSLURM(t *testing.T) {
|
|||
if val := metrics[0].memoryFailCount; val != 0 {
|
||||
t.Errorf("Unexpected value for memoryFailCount, got %v", val)
|
||||
}
|
||||
if val := metrics[0].swapUsed; val != 0 {
|
||||
if val := metrics[0].memswUsed; val != 356352 {
|
||||
t.Errorf("Unexpected value for swapUsed, got %v", val)
|
||||
}
|
||||
if val := metrics[0].swapTotal; val != 0 {
|
||||
if val := metrics[0].memswTotal; val != 2147483648 {
|
||||
t.Errorf("Unexpected value for swapTotal, got %v", val)
|
||||
}
|
||||
if val := metrics[0].swapFailCount; val != 0 {
|
||||
if val := metrics[0].memswFailCount; val != 0 {
|
||||
t.Errorf("Unexpected value for swapFailCount, got %v", val)
|
||||
}
|
||||
if val := metrics[0].uid; val != "20821" {
|
||||
|
@ -199,13 +199,13 @@ func TestCollectTorque(t *testing.T) {
|
|||
if val := metrics[0].memoryFailCount; val != 0 {
|
||||
t.Errorf("Unexpected value for memoryFailCount, got %v", val)
|
||||
}
|
||||
if val := metrics[0].swapUsed; val != 0 {
|
||||
if val := metrics[0].memswUsed; val != 82553999360 {
|
||||
t.Errorf("Unexpected value for swapUsed, got %v", val)
|
||||
}
|
||||
if val := metrics[0].swapTotal; val != 0 {
|
||||
if val := metrics[0].memswTotal; val != 196755132416 {
|
||||
t.Errorf("Unexpected value for swapTotal, got %v", val)
|
||||
}
|
||||
if val := metrics[0].swapFailCount; val != 0 {
|
||||
if val := metrics[0].memswFailCount; val != 0 {
|
||||
t.Errorf("Unexpected value for swapFailCount, got %v", val)
|
||||
}
|
||||
if val := metrics[0].uid; val != "" {
|
||||
|
|
Loading…
Reference in New Issue