Add metric to indicate collect failures
This commit is contained in:
parent
77f32e859b
commit
9bfac687d6
|
@ -65,6 +65,7 @@ type CgroupMetric struct {
|
|||
|
||||
type Exporter struct {
|
||||
paths []string
|
||||
collectError *prometheus.Desc
|
||||
cpuUser *prometheus.Desc
|
||||
cpuSystem *prometheus.Desc
|
||||
cpuTotal *prometheus.Desc
|
||||
|
@ -215,6 +216,8 @@ func getName(p cgroups.Process, path string) (string, error) {
|
|||
func NewExporter(paths []string) *Exporter {
|
||||
return &Exporter{
|
||||
paths: paths,
|
||||
collectError: prometheus.NewDesc(prometheus.BuildFQName(namespace, "exporter", "collect_error"),
|
||||
"Indicates exporter error, 0=no error, 1=error", []string{"path", "error"}, nil),
|
||||
cpuUser: prometheus.NewDesc(prometheus.BuildFQName(namespace, "cpu", "user_seconds"),
|
||||
"Cumalitive CPU user seconds for cgroup", []string{"cgroup"}, nil),
|
||||
cpuSystem: prometheus.NewDesc(prometheus.BuildFQName(namespace, "cpu", "kernel_seconds"),
|
||||
|
@ -244,7 +247,7 @@ func NewExporter(paths []string) *Exporter {
|
|||
}
|
||||
}
|
||||
|
||||
func (e *Exporter) collect() ([]CgroupMetric, error) {
|
||||
func (e *Exporter) collect(ch chan<- prometheus.Metric) ([]CgroupMetric, error) {
|
||||
var names []string
|
||||
var metrics []CgroupMetric
|
||||
for _, path := range e.paths {
|
||||
|
@ -252,12 +255,14 @@ func (e *Exporter) collect() ([]CgroupMetric, error) {
|
|||
control, err := cgroups.Load(subsystem, cgroups.StaticPath(path))
|
||||
if err != nil {
|
||||
log.Errorf("Error loading cgroup subsystem path %s: %s", path, err.Error())
|
||||
return nil, err
|
||||
ch <- prometheus.MustNewConstMetric(e.collectError, prometheus.GaugeValue, 1, path, "load-subsystem")
|
||||
continue
|
||||
}
|
||||
processes, err := control.Processes(cgroups.Cpuacct, true)
|
||||
if err != nil {
|
||||
log.Errorf("Error loading cgroup processes for path %s: %s", path, err.Error())
|
||||
return nil, err
|
||||
ch <- prometheus.MustNewConstMetric(e.collectError, prometheus.GaugeValue, 1, path, "load-processes")
|
||||
continue
|
||||
}
|
||||
log.Debugf("Found %d processes", len(processes))
|
||||
for _, p := range processes {
|
||||
|
@ -277,7 +282,8 @@ func (e *Exporter) collect() ([]CgroupMetric, error) {
|
|||
})
|
||||
if err != nil {
|
||||
log.Errorf("Failed to load cgroups for %s: %s", name, err.Error())
|
||||
return nil, err
|
||||
ch <- prometheus.MustNewConstMetric(e.collectError, prometheus.GaugeValue, 1, name, "load-subsystem")
|
||||
continue
|
||||
}
|
||||
stats, _ := ctrl.Stat(cgroups.IgnoreNotExist)
|
||||
metric.cpuUser = float64(stats.CPU.Usage.User) / 1000000000.0
|
||||
|
@ -311,11 +317,12 @@ func (e *Exporter) Describe(ch chan<- *prometheus.Desc) {
|
|||
ch <- e.swapUsed
|
||||
ch <- e.swapTotal
|
||||
ch <- e.swapFailCount
|
||||
ch <- e.collectError
|
||||
ch <- e.success
|
||||
}
|
||||
|
||||
func (e *Exporter) Collect(ch chan<- prometheus.Metric) {
|
||||
metrics, err := e.collect()
|
||||
metrics, err := e.collect(ch)
|
||||
if err != nil {
|
||||
log.Errorf("Exporter error: %s", err.Error())
|
||||
ch <- prometheus.MustNewConstMetric(e.success, prometheus.GaugeValue, 0)
|
||||
|
|
|
@ -14,6 +14,7 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/prometheus/common/log"
|
||||
kingpin "gopkg.in/alecthomas/kingpin.v2"
|
||||
"path/filepath"
|
||||
|
@ -21,6 +22,10 @@ import (
|
|||
"testing"
|
||||
)
|
||||
|
||||
var (
|
||||
ch = make(chan prometheus.Metric)
|
||||
)
|
||||
|
||||
func TestParseCpuSet(t *testing.T) {
|
||||
if cpus, err := parseCpuSet("0-2"); err != nil {
|
||||
t.Errorf("Unexpected error: %s", err.Error())
|
||||
|
@ -49,7 +54,7 @@ func TestCollectUserSlice(t *testing.T) {
|
|||
cgroupRoot = &fixture
|
||||
|
||||
exporter := NewExporter([]string{"/user.slice"})
|
||||
metrics, err := exporter.collect()
|
||||
metrics, err := exporter.collect(ch)
|
||||
if err != nil {
|
||||
t.Errorf("Unexpected error: %s", err.Error())
|
||||
return
|
||||
|
@ -104,7 +109,7 @@ func TestCollectSLURM(t *testing.T) {
|
|||
cgroupRoot = &fixture
|
||||
|
||||
exporter := NewExporter([]string{"/slurm"})
|
||||
metrics, err := exporter.collect()
|
||||
metrics, err := exporter.collect(ch)
|
||||
if err != nil {
|
||||
t.Errorf("Unexpected error: %s", err.Error())
|
||||
return
|
||||
|
@ -162,7 +167,7 @@ func TestCollectTorque(t *testing.T) {
|
|||
cgroupRoot = &fixture
|
||||
|
||||
exporter := NewExporter([]string{"/torque"})
|
||||
metrics, err := exporter.collect()
|
||||
metrics, err := exporter.collect(ch)
|
||||
if err != nil {
|
||||
t.Errorf("Unexpected error: %s", err.Error())
|
||||
return
|
||||
|
|
Loading…
Reference in New Issue