Fix error handling
This commit is contained in:
parent
903b0fd581
commit
a58b20c257
|
@ -61,6 +61,7 @@ type CgroupMetric struct {
|
||||||
uid string
|
uid string
|
||||||
username string
|
username string
|
||||||
jobid string
|
jobid string
|
||||||
|
err bool
|
||||||
}
|
}
|
||||||
|
|
||||||
type Exporter struct {
|
type Exporter struct {
|
||||||
|
@ -237,11 +238,11 @@ func NewExporter(paths []string) *Exporter {
|
||||||
info: prometheus.NewDesc(prometheus.BuildFQName(namespace, "", "info"),
|
info: prometheus.NewDesc(prometheus.BuildFQName(namespace, "", "info"),
|
||||||
"User slice information", []string{"cgroup", "username", "uid", "jobid"}, nil),
|
"User slice information", []string{"cgroup", "username", "uid", "jobid"}, nil),
|
||||||
collectError: prometheus.NewDesc(prometheus.BuildFQName(namespace, "exporter", "collect_error"),
|
collectError: prometheus.NewDesc(prometheus.BuildFQName(namespace, "exporter", "collect_error"),
|
||||||
"Indicates collection error, 0=no error, 1=error", []string{"path"}, nil),
|
"Indicates collection error, 0=no error, 1=error", []string{"cgroup"}, nil),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (e *Exporter) collect(ch chan<- prometheus.Metric) ([]CgroupMetric, error) {
|
func (e *Exporter) collect() ([]CgroupMetric, error) {
|
||||||
var names []string
|
var names []string
|
||||||
var metrics []CgroupMetric
|
var metrics []CgroupMetric
|
||||||
for _, path := range e.paths {
|
for _, path := range e.paths {
|
||||||
|
@ -249,13 +250,15 @@ func (e *Exporter) collect(ch chan<- prometheus.Metric) ([]CgroupMetric, error)
|
||||||
control, err := cgroups.Load(subsystem, cgroups.StaticPath(path))
|
control, err := cgroups.Load(subsystem, cgroups.StaticPath(path))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Errorf("Error loading cgroup subsystem path %s: %s", path, err.Error())
|
log.Errorf("Error loading cgroup subsystem path %s: %s", path, err.Error())
|
||||||
ch <- prometheus.MustNewConstMetric(e.collectError, prometheus.GaugeValue, 1, path)
|
metric := CgroupMetric{name: path, err: true}
|
||||||
|
metrics = append(metrics, metric)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
processes, err := control.Processes(cgroups.Cpuacct, true)
|
processes, err := control.Processes(cgroups.Cpuacct, true)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Errorf("Error loading cgroup processes for path %s: %s", path, err.Error())
|
log.Errorf("Error loading cgroup processes for path %s: %s", path, err.Error())
|
||||||
ch <- prometheus.MustNewConstMetric(e.collectError, prometheus.GaugeValue, 1, path)
|
metric := CgroupMetric{name: path, err: true}
|
||||||
|
metrics = append(metrics, metric)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
log.Debugf("Found %d processes", len(processes))
|
log.Debugf("Found %d processes", len(processes))
|
||||||
|
@ -276,7 +279,8 @@ func (e *Exporter) collect(ch chan<- prometheus.Metric) ([]CgroupMetric, error)
|
||||||
})
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Errorf("Failed to load cgroups for %s: %s", name, err.Error())
|
log.Errorf("Failed to load cgroups for %s: %s", name, err.Error())
|
||||||
ch <- prometheus.MustNewConstMetric(e.collectError, prometheus.GaugeValue, 1, name)
|
metric.err = true
|
||||||
|
metrics = append(metrics, metric)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
stats, _ := ctrl.Stat(cgroups.IgnoreNotExist)
|
stats, _ := ctrl.Stat(cgroups.IgnoreNotExist)
|
||||||
|
@ -295,7 +299,6 @@ func (e *Exporter) collect(ch chan<- prometheus.Metric) ([]CgroupMetric, error)
|
||||||
getInfo(name, &metric)
|
getInfo(name, &metric)
|
||||||
metrics = append(metrics, metric)
|
metrics = append(metrics, metric)
|
||||||
}
|
}
|
||||||
ch <- prometheus.MustNewConstMetric(e.collectError, prometheus.GaugeValue, 0, path)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return metrics, nil
|
return metrics, nil
|
||||||
|
@ -312,12 +315,14 @@ func (e *Exporter) Describe(ch chan<- *prometheus.Desc) {
|
||||||
ch <- e.swapUsed
|
ch <- e.swapUsed
|
||||||
ch <- e.swapTotal
|
ch <- e.swapTotal
|
||||||
ch <- e.swapFailCount
|
ch <- e.swapFailCount
|
||||||
ch <- e.collectError
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (e *Exporter) Collect(ch chan<- prometheus.Metric) {
|
func (e *Exporter) Collect(ch chan<- prometheus.Metric) {
|
||||||
metrics, _ := e.collect(ch)
|
metrics, _ := e.collect()
|
||||||
for _, m := range metrics {
|
for _, m := range metrics {
|
||||||
|
if m.err {
|
||||||
|
ch <- prometheus.MustNewConstMetric(e.collectError, prometheus.GaugeValue, 1, m.name)
|
||||||
|
}
|
||||||
ch <- prometheus.MustNewConstMetric(e.cpuUser, prometheus.GaugeValue, m.cpuUser, m.name)
|
ch <- prometheus.MustNewConstMetric(e.cpuUser, prometheus.GaugeValue, m.cpuUser, m.name)
|
||||||
ch <- prometheus.MustNewConstMetric(e.cpuSystem, prometheus.GaugeValue, m.cpuSystem, m.name)
|
ch <- prometheus.MustNewConstMetric(e.cpuSystem, prometheus.GaugeValue, m.cpuSystem, m.name)
|
||||||
ch <- prometheus.MustNewConstMetric(e.cpuTotal, prometheus.GaugeValue, m.cpuTotal, m.name)
|
ch <- prometheus.MustNewConstMetric(e.cpuTotal, prometheus.GaugeValue, m.cpuTotal, m.name)
|
||||||
|
|
|
@ -14,7 +14,6 @@
|
||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"github.com/prometheus/client_golang/prometheus"
|
|
||||||
"github.com/prometheus/common/log"
|
"github.com/prometheus/common/log"
|
||||||
kingpin "gopkg.in/alecthomas/kingpin.v2"
|
kingpin "gopkg.in/alecthomas/kingpin.v2"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
@ -22,10 +21,6 @@ import (
|
||||||
"testing"
|
"testing"
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
|
||||||
ch = make(chan prometheus.Metric)
|
|
||||||
)
|
|
||||||
|
|
||||||
func TestParseCpuSet(t *testing.T) {
|
func TestParseCpuSet(t *testing.T) {
|
||||||
if cpus, err := parseCpuSet("0-2"); err != nil {
|
if cpus, err := parseCpuSet("0-2"); err != nil {
|
||||||
t.Errorf("Unexpected error: %s", err.Error())
|
t.Errorf("Unexpected error: %s", err.Error())
|
||||||
|
@ -54,7 +49,7 @@ func TestCollectUserSlice(t *testing.T) {
|
||||||
cgroupRoot = &fixture
|
cgroupRoot = &fixture
|
||||||
|
|
||||||
exporter := NewExporter([]string{"/user.slice"})
|
exporter := NewExporter([]string{"/user.slice"})
|
||||||
metrics, err := exporter.collect(ch)
|
metrics, err := exporter.collect()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Errorf("Unexpected error: %s", err.Error())
|
t.Errorf("Unexpected error: %s", err.Error())
|
||||||
return
|
return
|
||||||
|
@ -109,7 +104,7 @@ func TestCollectSLURM(t *testing.T) {
|
||||||
cgroupRoot = &fixture
|
cgroupRoot = &fixture
|
||||||
|
|
||||||
exporter := NewExporter([]string{"/slurm"})
|
exporter := NewExporter([]string{"/slurm"})
|
||||||
metrics, err := exporter.collect(ch)
|
metrics, err := exporter.collect()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Errorf("Unexpected error: %s", err.Error())
|
t.Errorf("Unexpected error: %s", err.Error())
|
||||||
return
|
return
|
||||||
|
@ -167,7 +162,7 @@ func TestCollectTorque(t *testing.T) {
|
||||||
cgroupRoot = &fixture
|
cgroupRoot = &fixture
|
||||||
|
|
||||||
exporter := NewExporter([]string{"/torque"})
|
exporter := NewExporter([]string{"/torque"})
|
||||||
metrics, err := exporter.collect(ch)
|
metrics, err := exporter.collect()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Errorf("Unexpected error: %s", err.Error())
|
t.Errorf("Unexpected error: %s", err.Error())
|
||||||
return
|
return
|
||||||
|
|
Loading…
Reference in New Issue