commit
cd0c8008d8
|
@ -50,6 +50,7 @@ cgroup_cpu_system_seconds{cgroup="/user.slice/user-20821.slice"} 1.96
|
||||||
cgroup_cpu_total_seconds{cgroup="/user.slice/user-20821.slice"} 3.817500568
|
cgroup_cpu_total_seconds{cgroup="/user.slice/user-20821.slice"} 3.817500568
|
||||||
cgroup_cpu_user_seconds{cgroup="/user.slice/user-20821.slice"} 1.61
|
cgroup_cpu_user_seconds{cgroup="/user.slice/user-20821.slice"} 1.61
|
||||||
cgroup_cpus{cgroup="/user.slice/user-20821.slice"} 0
|
cgroup_cpus{cgroup="/user.slice/user-20821.slice"} 0
|
||||||
|
cgroup_cpu_info{cgroup="/user.slice/user-20821.slice",cpus=""} 1
|
||||||
cgroup_info{cgroup="/user.slice/user-20821.slice",uid="20821",username="tdockendorf",jobid=""} 1
|
cgroup_info{cgroup="/user.slice/user-20821.slice",uid="20821",username="tdockendorf",jobid=""} 1
|
||||||
cgroup_memory_cache_bytes{cgroup="/user.slice/user-20821.slice"} 2.322432e+06
|
cgroup_memory_cache_bytes{cgroup="/user.slice/user-20821.slice"} 2.322432e+06
|
||||||
cgroup_memory_fail_count{cgroup="/user.slice/user-20821.slice"} 0
|
cgroup_memory_fail_count{cgroup="/user.slice/user-20821.slice"} 0
|
||||||
|
@ -68,6 +69,7 @@ cgroup_cpu_system_seconds{cgroup="/slurm/uid_20821/job_12"} 0
|
||||||
cgroup_cpu_total_seconds{cgroup="/slurm/uid_20821/job_12"} 0.007840451
|
cgroup_cpu_total_seconds{cgroup="/slurm/uid_20821/job_12"} 0.007840451
|
||||||
cgroup_cpu_user_seconds{cgroup="/slurm/uid_20821/job_12"} 0
|
cgroup_cpu_user_seconds{cgroup="/slurm/uid_20821/job_12"} 0
|
||||||
cgroup_cpus{cgroup="/slurm/uid_20821/job_12"} 2
|
cgroup_cpus{cgroup="/slurm/uid_20821/job_12"} 2
|
||||||
|
cgroup_cpu_info{cgroup="/slurm/uid_20821/job_12",cpus="0,1"} 1
|
||||||
cgroup_info{cgroup="/slurm/uid_20821/job_12",jobid="12",uid="20821",username="tdockendorf"} 1
|
cgroup_info{cgroup="/slurm/uid_20821/job_12",jobid="12",uid="20821",username="tdockendorf"} 1
|
||||||
cgroup_memory_cache_bytes{cgroup="/slurm/uid_20821/job_12"} 4.096e+03
|
cgroup_memory_cache_bytes{cgroup="/slurm/uid_20821/job_12"} 4.096e+03
|
||||||
cgroup_memory_fail_count{cgroup="/slurm/uid_20821/job_12"} 0
|
cgroup_memory_fail_count{cgroup="/slurm/uid_20821/job_12"} 0
|
||||||
|
@ -85,7 +87,8 @@ Example of metrics exposed by this exporter when looking at `/torque` paths:
|
||||||
cgroup_cpu_system_seconds{cgroup="/torque/1182958.batch.example.com"} 26.35
|
cgroup_cpu_system_seconds{cgroup="/torque/1182958.batch.example.com"} 26.35
|
||||||
cgroup_cpu_total_seconds{cgroup="/torque/1182958.batch.example.com"} 939.568245515
|
cgroup_cpu_total_seconds{cgroup="/torque/1182958.batch.example.com"} 939.568245515
|
||||||
cgroup_cpu_user_seconds{cgroup="/torque/1182958.batch.example.com"} 915.61
|
cgroup_cpu_user_seconds{cgroup="/torque/1182958.batch.example.com"} 915.61
|
||||||
cgroup_cpus{cgroup="/torque/1182958.batch.example.com"} 40
|
cgroup_cpus{cgroup="/torque/1182958.batch.example.com"} 8
|
||||||
|
cgroup_cpu_info{cgroup="/torque/1182958.batch.example.com",cpus="0,1,2,3,4,5,6,7,8"} 1
|
||||||
cgroup_info{cgroup="/torque/1182958.batch.example.com",jobid="1182958",uid="",username=""} 1
|
cgroup_info{cgroup="/torque/1182958.batch.example.com",jobid="1182958",uid="",username=""} 1
|
||||||
cgroup_memory_cache_bytes{cgroup="/torque/1182958.batch.example.com"} 1.09678592e+08
|
cgroup_memory_cache_bytes{cgroup="/torque/1182958.batch.example.com"} 1.09678592e+08
|
||||||
cgroup_memory_fail_count{cgroup="/torque/1182958.batch.example.com"} 0
|
cgroup_memory_fail_count{cgroup="/torque/1182958.batch.example.com"} 0
|
||||||
|
|
|
@ -50,6 +50,7 @@ type CgroupMetric struct {
|
||||||
cpuSystem float64
|
cpuSystem float64
|
||||||
cpuTotal float64
|
cpuTotal float64
|
||||||
cpus int
|
cpus int
|
||||||
|
cpu_list string
|
||||||
memoryRSS float64
|
memoryRSS float64
|
||||||
memoryCache float64
|
memoryCache float64
|
||||||
memoryUsed float64
|
memoryUsed float64
|
||||||
|
@ -73,6 +74,7 @@ type Exporter struct {
|
||||||
cpuSystem *prometheus.Desc
|
cpuSystem *prometheus.Desc
|
||||||
cpuTotal *prometheus.Desc
|
cpuTotal *prometheus.Desc
|
||||||
cpus *prometheus.Desc
|
cpus *prometheus.Desc
|
||||||
|
cpu_info *prometheus.Desc
|
||||||
memoryRSS *prometheus.Desc
|
memoryRSS *prometheus.Desc
|
||||||
memoryCache *prometheus.Desc
|
memoryCache *prometheus.Desc
|
||||||
memoryUsed *prometheus.Desc
|
memoryUsed *prometheus.Desc
|
||||||
|
@ -109,47 +111,54 @@ func subsystem() ([]cgroups.Subsystem, error) {
|
||||||
return s, nil
|
return s, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func getCPUs(name string) (int, error) {
|
func getCPUs(name string) ([]string, error) {
|
||||||
cpusPath := fmt.Sprintf("%s/cpuset%s/cpuset.cpus", *cgroupRoot, name)
|
cpusPath := fmt.Sprintf("%s/cpuset%s/cpuset.cpus", *cgroupRoot, name)
|
||||||
if !fileExists(cpusPath) {
|
if !fileExists(cpusPath) {
|
||||||
return 0, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
cpusData, err := ioutil.ReadFile(cpusPath)
|
cpusData, err := ioutil.ReadFile(cpusPath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Errorf("Error reading %s: %s", cpusPath, err.Error())
|
log.Errorf("Error reading %s: %s", cpusPath, err.Error())
|
||||||
return 0, err
|
return nil, err
|
||||||
}
|
}
|
||||||
cpus, err := parseCpuSet(strings.TrimSuffix(string(cpusData), "\n"))
|
cpus, err := parseCpuSet(strings.TrimSuffix(string(cpusData), "\n"))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Errorf("Error parsing cpu set %s", err.Error())
|
log.Errorf("Error parsing cpu set %s", err.Error())
|
||||||
return 0, err
|
return nil, err
|
||||||
}
|
}
|
||||||
return cpus, nil
|
return cpus, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func parseCpuSet(cpuset string) (int, error) {
|
func parseCpuSet(cpuset string) ([]string, error) {
|
||||||
var cpus int
|
var cpus []string
|
||||||
|
var start, end int
|
||||||
|
var err error
|
||||||
if cpuset == "" {
|
if cpuset == "" {
|
||||||
return 0, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
ranges := strings.Split(cpuset, ",")
|
ranges := strings.Split(cpuset, ",")
|
||||||
for _, r := range ranges {
|
for _, r := range ranges {
|
||||||
boundaries := strings.Split(r, "-")
|
boundaries := strings.Split(r, "-")
|
||||||
if len(boundaries) == 1 {
|
if len(boundaries) == 1 {
|
||||||
cpus++
|
start, err = strconv.Atoi(boundaries[0])
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
end = start
|
||||||
} else if len(boundaries) == 2 {
|
} else if len(boundaries) == 2 {
|
||||||
start, err := strconv.Atoi(boundaries[0])
|
start, err = strconv.Atoi(boundaries[0])
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return 0, err
|
return nil, err
|
||||||
}
|
}
|
||||||
end, err := strconv.Atoi(boundaries[1])
|
end, err = strconv.Atoi(boundaries[1])
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return 0, err
|
return nil, err
|
||||||
}
|
|
||||||
for e := start; e <= end; e++ {
|
|
||||||
cpus++
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
for e := start; e <= end; e++ {
|
||||||
|
cpu := strconv.Itoa(e)
|
||||||
|
cpus = append(cpus, cpu)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return cpus, nil
|
return cpus, nil
|
||||||
}
|
}
|
||||||
|
@ -227,6 +236,8 @@ func NewExporter(paths []string) *Exporter {
|
||||||
"Cumalitive CPU total seconds for cgroup", []string{"cgroup"}, nil),
|
"Cumalitive CPU total seconds for cgroup", []string{"cgroup"}, nil),
|
||||||
cpus: prometheus.NewDesc(prometheus.BuildFQName(namespace, "", "cpus"),
|
cpus: prometheus.NewDesc(prometheus.BuildFQName(namespace, "", "cpus"),
|
||||||
"Number of CPUs in the cgroup", []string{"cgroup"}, nil),
|
"Number of CPUs in the cgroup", []string{"cgroup"}, nil),
|
||||||
|
cpu_info: prometheus.NewDesc(prometheus.BuildFQName(namespace, "", "cpu_info"),
|
||||||
|
"Information about the cgroup CPUs", []string{"cgroup", "cpus"}, nil),
|
||||||
memoryRSS: prometheus.NewDesc(prometheus.BuildFQName(namespace, "memory", "rss_bytes"),
|
memoryRSS: prometheus.NewDesc(prometheus.BuildFQName(namespace, "memory", "rss_bytes"),
|
||||||
"Memory RSS used in bytes", []string{"cgroup"}, nil),
|
"Memory RSS used in bytes", []string{"cgroup"}, nil),
|
||||||
memoryCache: prometheus.NewDesc(prometheus.BuildFQName(namespace, "memory", "cache_bytes"),
|
memoryCache: prometheus.NewDesc(prometheus.BuildFQName(namespace, "memory", "cache_bytes"),
|
||||||
|
@ -304,7 +315,8 @@ func (e *Exporter) collect() ([]CgroupMetric, error) {
|
||||||
metric.memswTotal = float64(stats.Memory.Swap.Limit)
|
metric.memswTotal = float64(stats.Memory.Swap.Limit)
|
||||||
metric.memswFailCount = float64(stats.Memory.Swap.Failcnt)
|
metric.memswFailCount = float64(stats.Memory.Swap.Failcnt)
|
||||||
if cpus, err := getCPUs(name); err == nil {
|
if cpus, err := getCPUs(name); err == nil {
|
||||||
metric.cpus = cpus
|
metric.cpus = len(cpus)
|
||||||
|
metric.cpu_list = strings.Join(cpus, ",")
|
||||||
}
|
}
|
||||||
getInfo(name, &metric)
|
getInfo(name, &metric)
|
||||||
metrics = append(metrics, metric)
|
metrics = append(metrics, metric)
|
||||||
|
@ -319,6 +331,7 @@ func (e *Exporter) Describe(ch chan<- *prometheus.Desc) {
|
||||||
ch <- e.cpuSystem
|
ch <- e.cpuSystem
|
||||||
ch <- e.cpuTotal
|
ch <- e.cpuTotal
|
||||||
ch <- e.cpus
|
ch <- e.cpus
|
||||||
|
ch <- e.cpu_info
|
||||||
ch <- e.memoryRSS
|
ch <- e.memoryRSS
|
||||||
ch <- e.memoryCache
|
ch <- e.memoryCache
|
||||||
ch <- e.memoryUsed
|
ch <- e.memoryUsed
|
||||||
|
@ -339,6 +352,7 @@ func (e *Exporter) Collect(ch chan<- prometheus.Metric) {
|
||||||
ch <- prometheus.MustNewConstMetric(e.cpuSystem, prometheus.GaugeValue, m.cpuSystem, m.name)
|
ch <- prometheus.MustNewConstMetric(e.cpuSystem, prometheus.GaugeValue, m.cpuSystem, m.name)
|
||||||
ch <- prometheus.MustNewConstMetric(e.cpuTotal, prometheus.GaugeValue, m.cpuTotal, m.name)
|
ch <- prometheus.MustNewConstMetric(e.cpuTotal, prometheus.GaugeValue, m.cpuTotal, m.name)
|
||||||
ch <- prometheus.MustNewConstMetric(e.cpus, prometheus.GaugeValue, float64(m.cpus), m.name)
|
ch <- prometheus.MustNewConstMetric(e.cpus, prometheus.GaugeValue, float64(m.cpus), m.name)
|
||||||
|
ch <- prometheus.MustNewConstMetric(e.cpu_info, prometheus.GaugeValue, 1, m.name, m.cpu_list)
|
||||||
ch <- prometheus.MustNewConstMetric(e.memoryRSS, prometheus.GaugeValue, m.memoryRSS, m.name)
|
ch <- prometheus.MustNewConstMetric(e.memoryRSS, prometheus.GaugeValue, m.memoryRSS, m.name)
|
||||||
ch <- prometheus.MustNewConstMetric(e.memoryCache, prometheus.GaugeValue, m.memoryCache, m.name)
|
ch <- prometheus.MustNewConstMetric(e.memoryCache, prometheus.GaugeValue, m.memoryCache, m.name)
|
||||||
ch <- prometheus.MustNewConstMetric(e.memoryUsed, prometheus.GaugeValue, m.memoryUsed, m.name)
|
ch <- prometheus.MustNewConstMetric(e.memoryUsed, prometheus.GaugeValue, m.memoryUsed, m.name)
|
||||||
|
|
|
@ -21,6 +21,7 @@ import (
|
||||||
"net/http"
|
"net/http"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
"reflect"
|
||||||
"runtime"
|
"runtime"
|
||||||
"strings"
|
"strings"
|
||||||
"testing"
|
"testing"
|
||||||
|
@ -53,20 +54,23 @@ func TestMain(m *testing.M) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestParseCpuSet(t *testing.T) {
|
func TestParseCpuSet(t *testing.T) {
|
||||||
|
expected := []string{"0", "1", "2"}
|
||||||
if cpus, err := parseCpuSet("0-2"); err != nil {
|
if cpus, err := parseCpuSet("0-2"); err != nil {
|
||||||
t.Errorf("Unexpected error: %s", err.Error())
|
t.Errorf("Unexpected error: %s", err.Error())
|
||||||
} else if cpus != 3 {
|
} else if !reflect.DeepEqual(cpus, expected) {
|
||||||
t.Errorf("Unexpected cpus, expected 3 got %d", cpus)
|
t.Errorf("Unexpected cpus, expected %v got %v", expected, cpus)
|
||||||
}
|
}
|
||||||
|
expected = []string{"0", "1", "4", "5", "8", "9"}
|
||||||
if cpus, err := parseCpuSet("0-1,4-5,8-9"); err != nil {
|
if cpus, err := parseCpuSet("0-1,4-5,8-9"); err != nil {
|
||||||
t.Errorf("Unexpected error: %s", err.Error())
|
t.Errorf("Unexpected error: %s", err.Error())
|
||||||
} else if cpus != 6 {
|
} else if !reflect.DeepEqual(cpus, expected) {
|
||||||
t.Errorf("Unexpected cpus, expected 6 got %d", cpus)
|
t.Errorf("Unexpected cpus, expected %v got %v", expected, cpus)
|
||||||
}
|
}
|
||||||
|
expected = []string{"1", "3", "5", "7"}
|
||||||
if cpus, err := parseCpuSet("1,3,5,7"); err != nil {
|
if cpus, err := parseCpuSet("1,3,5,7"); err != nil {
|
||||||
t.Errorf("Unexpected error: %s", err.Error())
|
t.Errorf("Unexpected error: %s", err.Error())
|
||||||
} else if cpus != 4 {
|
} else if !reflect.DeepEqual(cpus, expected) {
|
||||||
t.Errorf("Unexpected cpus, expected 4 got %d", cpus)
|
t.Errorf("Unexpected cpus, expected %v got %v", expected, cpus)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue