commit
cd0c8008d8
|
@ -50,6 +50,7 @@ cgroup_cpu_system_seconds{cgroup="/user.slice/user-20821.slice"} 1.96
|
|||
cgroup_cpu_total_seconds{cgroup="/user.slice/user-20821.slice"} 3.817500568
|
||||
cgroup_cpu_user_seconds{cgroup="/user.slice/user-20821.slice"} 1.61
|
||||
cgroup_cpus{cgroup="/user.slice/user-20821.slice"} 0
|
||||
cgroup_cpu_info{cgroup="/user.slice/user-20821.slice",cpus=""} 1
|
||||
cgroup_info{cgroup="/user.slice/user-20821.slice",uid="20821",username="tdockendorf",jobid=""} 1
|
||||
cgroup_memory_cache_bytes{cgroup="/user.slice/user-20821.slice"} 2.322432e+06
|
||||
cgroup_memory_fail_count{cgroup="/user.slice/user-20821.slice"} 0
|
||||
|
@ -68,6 +69,7 @@ cgroup_cpu_system_seconds{cgroup="/slurm/uid_20821/job_12"} 0
|
|||
cgroup_cpu_total_seconds{cgroup="/slurm/uid_20821/job_12"} 0.007840451
|
||||
cgroup_cpu_user_seconds{cgroup="/slurm/uid_20821/job_12"} 0
|
||||
cgroup_cpus{cgroup="/slurm/uid_20821/job_12"} 2
|
||||
cgroup_cpu_info{cgroup="/slurm/uid_20821/job_12",cpus="0,1"} 1
|
||||
cgroup_info{cgroup="/slurm/uid_20821/job_12",jobid="12",uid="20821",username="tdockendorf"} 1
|
||||
cgroup_memory_cache_bytes{cgroup="/slurm/uid_20821/job_12"} 4.096e+03
|
||||
cgroup_memory_fail_count{cgroup="/slurm/uid_20821/job_12"} 0
|
||||
|
@ -85,7 +87,8 @@ Example of metrics exposed by this exporter when looking at `/torque` paths:
|
|||
cgroup_cpu_system_seconds{cgroup="/torque/1182958.batch.example.com"} 26.35
|
||||
cgroup_cpu_total_seconds{cgroup="/torque/1182958.batch.example.com"} 939.568245515
|
||||
cgroup_cpu_user_seconds{cgroup="/torque/1182958.batch.example.com"} 915.61
|
||||
cgroup_cpus{cgroup="/torque/1182958.batch.example.com"} 40
|
||||
cgroup_cpus{cgroup="/torque/1182958.batch.example.com"} 8
|
||||
cgroup_cpu_info{cgroup="/torque/1182958.batch.example.com",cpus="0,1,2,3,4,5,6,7,8"} 1
|
||||
cgroup_info{cgroup="/torque/1182958.batch.example.com",jobid="1182958",uid="",username=""} 1
|
||||
cgroup_memory_cache_bytes{cgroup="/torque/1182958.batch.example.com"} 1.09678592e+08
|
||||
cgroup_memory_fail_count{cgroup="/torque/1182958.batch.example.com"} 0
|
||||
|
|
|
@ -50,6 +50,7 @@ type CgroupMetric struct {
|
|||
cpuSystem float64
|
||||
cpuTotal float64
|
||||
cpus int
|
||||
cpu_list string
|
||||
memoryRSS float64
|
||||
memoryCache float64
|
||||
memoryUsed float64
|
||||
|
@ -73,6 +74,7 @@ type Exporter struct {
|
|||
cpuSystem *prometheus.Desc
|
||||
cpuTotal *prometheus.Desc
|
||||
cpus *prometheus.Desc
|
||||
cpu_info *prometheus.Desc
|
||||
memoryRSS *prometheus.Desc
|
||||
memoryCache *prometheus.Desc
|
||||
memoryUsed *prometheus.Desc
|
||||
|
@ -109,46 +111,53 @@ func subsystem() ([]cgroups.Subsystem, error) {
|
|||
return s, nil
|
||||
}
|
||||
|
||||
func getCPUs(name string) (int, error) {
|
||||
func getCPUs(name string) ([]string, error) {
|
||||
cpusPath := fmt.Sprintf("%s/cpuset%s/cpuset.cpus", *cgroupRoot, name)
|
||||
if !fileExists(cpusPath) {
|
||||
return 0, nil
|
||||
return nil, nil
|
||||
}
|
||||
cpusData, err := ioutil.ReadFile(cpusPath)
|
||||
if err != nil {
|
||||
log.Errorf("Error reading %s: %s", cpusPath, err.Error())
|
||||
return 0, err
|
||||
return nil, err
|
||||
}
|
||||
cpus, err := parseCpuSet(strings.TrimSuffix(string(cpusData), "\n"))
|
||||
if err != nil {
|
||||
log.Errorf("Error parsing cpu set %s", err.Error())
|
||||
return 0, err
|
||||
return nil, err
|
||||
}
|
||||
return cpus, nil
|
||||
}
|
||||
|
||||
func parseCpuSet(cpuset string) (int, error) {
|
||||
var cpus int
|
||||
func parseCpuSet(cpuset string) ([]string, error) {
|
||||
var cpus []string
|
||||
var start, end int
|
||||
var err error
|
||||
if cpuset == "" {
|
||||
return 0, nil
|
||||
return nil, nil
|
||||
}
|
||||
ranges := strings.Split(cpuset, ",")
|
||||
for _, r := range ranges {
|
||||
boundaries := strings.Split(r, "-")
|
||||
if len(boundaries) == 1 {
|
||||
cpus++
|
||||
} else if len(boundaries) == 2 {
|
||||
start, err := strconv.Atoi(boundaries[0])
|
||||
start, err = strconv.Atoi(boundaries[0])
|
||||
if err != nil {
|
||||
return 0, err
|
||||
return nil, err
|
||||
}
|
||||
end, err := strconv.Atoi(boundaries[1])
|
||||
end = start
|
||||
} else if len(boundaries) == 2 {
|
||||
start, err = strconv.Atoi(boundaries[0])
|
||||
if err != nil {
|
||||
return 0, err
|
||||
return nil, err
|
||||
}
|
||||
end, err = strconv.Atoi(boundaries[1])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
for e := start; e <= end; e++ {
|
||||
cpus++
|
||||
}
|
||||
cpu := strconv.Itoa(e)
|
||||
cpus = append(cpus, cpu)
|
||||
}
|
||||
}
|
||||
return cpus, nil
|
||||
|
@ -227,6 +236,8 @@ func NewExporter(paths []string) *Exporter {
|
|||
"Cumalitive CPU total seconds for cgroup", []string{"cgroup"}, nil),
|
||||
cpus: prometheus.NewDesc(prometheus.BuildFQName(namespace, "", "cpus"),
|
||||
"Number of CPUs in the cgroup", []string{"cgroup"}, nil),
|
||||
cpu_info: prometheus.NewDesc(prometheus.BuildFQName(namespace, "", "cpu_info"),
|
||||
"Information about the cgroup CPUs", []string{"cgroup", "cpus"}, nil),
|
||||
memoryRSS: prometheus.NewDesc(prometheus.BuildFQName(namespace, "memory", "rss_bytes"),
|
||||
"Memory RSS used in bytes", []string{"cgroup"}, nil),
|
||||
memoryCache: prometheus.NewDesc(prometheus.BuildFQName(namespace, "memory", "cache_bytes"),
|
||||
|
@ -304,7 +315,8 @@ func (e *Exporter) collect() ([]CgroupMetric, error) {
|
|||
metric.memswTotal = float64(stats.Memory.Swap.Limit)
|
||||
metric.memswFailCount = float64(stats.Memory.Swap.Failcnt)
|
||||
if cpus, err := getCPUs(name); err == nil {
|
||||
metric.cpus = cpus
|
||||
metric.cpus = len(cpus)
|
||||
metric.cpu_list = strings.Join(cpus, ",")
|
||||
}
|
||||
getInfo(name, &metric)
|
||||
metrics = append(metrics, metric)
|
||||
|
@ -319,6 +331,7 @@ func (e *Exporter) Describe(ch chan<- *prometheus.Desc) {
|
|||
ch <- e.cpuSystem
|
||||
ch <- e.cpuTotal
|
||||
ch <- e.cpus
|
||||
ch <- e.cpu_info
|
||||
ch <- e.memoryRSS
|
||||
ch <- e.memoryCache
|
||||
ch <- e.memoryUsed
|
||||
|
@ -339,6 +352,7 @@ func (e *Exporter) Collect(ch chan<- prometheus.Metric) {
|
|||
ch <- prometheus.MustNewConstMetric(e.cpuSystem, prometheus.GaugeValue, m.cpuSystem, m.name)
|
||||
ch <- prometheus.MustNewConstMetric(e.cpuTotal, prometheus.GaugeValue, m.cpuTotal, m.name)
|
||||
ch <- prometheus.MustNewConstMetric(e.cpus, prometheus.GaugeValue, float64(m.cpus), m.name)
|
||||
ch <- prometheus.MustNewConstMetric(e.cpu_info, prometheus.GaugeValue, 1, m.name, m.cpu_list)
|
||||
ch <- prometheus.MustNewConstMetric(e.memoryRSS, prometheus.GaugeValue, m.memoryRSS, m.name)
|
||||
ch <- prometheus.MustNewConstMetric(e.memoryCache, prometheus.GaugeValue, m.memoryCache, m.name)
|
||||
ch <- prometheus.MustNewConstMetric(e.memoryUsed, prometheus.GaugeValue, m.memoryUsed, m.name)
|
||||
|
|
|
@ -21,6 +21,7 @@ import (
|
|||
"net/http"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"reflect"
|
||||
"runtime"
|
||||
"strings"
|
||||
"testing"
|
||||
|
@ -53,20 +54,23 @@ func TestMain(m *testing.M) {
|
|||
}
|
||||
|
||||
func TestParseCpuSet(t *testing.T) {
|
||||
expected := []string{"0", "1", "2"}
|
||||
if cpus, err := parseCpuSet("0-2"); err != nil {
|
||||
t.Errorf("Unexpected error: %s", err.Error())
|
||||
} else if cpus != 3 {
|
||||
t.Errorf("Unexpected cpus, expected 3 got %d", cpus)
|
||||
} else if !reflect.DeepEqual(cpus, expected) {
|
||||
t.Errorf("Unexpected cpus, expected %v got %v", expected, cpus)
|
||||
}
|
||||
expected = []string{"0", "1", "4", "5", "8", "9"}
|
||||
if cpus, err := parseCpuSet("0-1,4-5,8-9"); err != nil {
|
||||
t.Errorf("Unexpected error: %s", err.Error())
|
||||
} else if cpus != 6 {
|
||||
t.Errorf("Unexpected cpus, expected 6 got %d", cpus)
|
||||
} else if !reflect.DeepEqual(cpus, expected) {
|
||||
t.Errorf("Unexpected cpus, expected %v got %v", expected, cpus)
|
||||
}
|
||||
expected = []string{"1", "3", "5", "7"}
|
||||
if cpus, err := parseCpuSet("1,3,5,7"); err != nil {
|
||||
t.Errorf("Unexpected error: %s", err.Error())
|
||||
} else if cpus != 4 {
|
||||
t.Errorf("Unexpected cpus, expected 4 got %d", cpus)
|
||||
} else if !reflect.DeepEqual(cpus, expected) {
|
||||
t.Errorf("Unexpected cpus, expected %v got %v", expected, cpus)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue