2020-02-12 16:07:45 +00:00
// Copyright 2020 Trey Dockendorf
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"fmt"
"net/http"
"os"
2020-02-12 17:33:18 +00:00
"os/user"
2020-02-12 16:07:45 +00:00
"path/filepath"
2020-10-02 17:53:15 +00:00
"reflect"
2020-02-12 17:33:18 +00:00
"regexp"
2020-02-12 16:07:45 +00:00
"strconv"
"strings"
2020-10-02 20:24:03 +00:00
"sync"
2020-02-12 16:07:45 +00:00
"github.com/containerd/cgroups"
2020-10-02 19:21:18 +00:00
"github.com/go-kit/kit/log"
"github.com/go-kit/kit/log/level"
2020-02-12 16:07:45 +00:00
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
2020-10-02 19:21:18 +00:00
"github.com/prometheus/common/promlog"
"github.com/prometheus/common/promlog/flag"
2020-02-12 16:07:45 +00:00
"github.com/prometheus/common/version"
2020-10-02 17:53:15 +00:00
"github.com/prometheus/procfs"
2020-02-12 16:07:45 +00:00
"gopkg.in/alecthomas/kingpin.v2"
)
const (
namespace = "cgroup"
)
var (
defCgroupRoot = "/sys/fs/cgroup"
2020-10-02 17:53:15 +00:00
defProcRoot = "/proc"
2020-02-12 17:58:18 +00:00
configPaths = kingpin . Flag ( "config.paths" , "Comma separated list of cgroup paths to check, eg /user.slice,/system.slice,/slurm" ) . Required ( ) . String ( )
2020-02-13 16:59:25 +00:00
listenAddress = kingpin . Flag ( "web.listen-address" , "Address to listen on for web interface and telemetry." ) . Default ( ":9306" ) . String ( )
2020-02-12 16:07:45 +00:00
disableExporterMetrics = kingpin . Flag ( "web.disable-exporter-metrics" , "Exclude metrics about the exporter (promhttp_*, process_*, go_*)" ) . Default ( "false" ) . Bool ( )
cgroupRoot = kingpin . Flag ( "path.cgroup.root" , "Root path to cgroup fs" ) . Default ( defCgroupRoot ) . String ( )
2020-10-02 17:53:15 +00:00
procRoot = kingpin . Flag ( "path.proc.root" , "Root path to proc fs" ) . Default ( defProcRoot ) . String ( )
collectProc = kingpin . Flag ( "collect.proc" , "Boolean that sets if to collect proc information" ) . Default ( "false" ) . Bool ( )
collectProcMaxExec = kingpin . Flag ( "collect.proc.max-exec" , "Max length of process executable to record" ) . Default ( "100" ) . Int ( )
2020-10-02 20:24:03 +00:00
metricLock = sync . RWMutex { }
2020-02-12 16:07:45 +00:00
)
type CgroupMetric struct {
2020-02-14 17:08:22 +00:00
name string
cpuUser float64
cpuSystem float64
cpuTotal float64
cpus int
2020-10-01 14:50:22 +00:00
cpu_list string
2020-04-03 12:54:56 +00:00
memoryRSS float64
memoryCache float64
2020-02-14 17:08:22 +00:00
memoryUsed float64
memoryTotal float64
memoryFailCount float64
2020-02-27 16:03:20 +00:00
memswUsed float64
memswTotal float64
memswFailCount float64
2020-02-14 17:08:22 +00:00
userslice bool
job bool
uid string
username string
jobid string
2020-10-02 17:53:15 +00:00
processExec map [ string ] float64
2020-02-20 20:12:32 +00:00
err bool
2020-02-12 16:07:45 +00:00
}
type Exporter struct {
2020-02-14 17:08:22 +00:00
paths [ ] string
2020-02-20 18:48:02 +00:00
collectError * prometheus . Desc
2020-02-14 17:08:22 +00:00
cpuUser * prometheus . Desc
cpuSystem * prometheus . Desc
cpuTotal * prometheus . Desc
cpus * prometheus . Desc
2020-10-01 14:50:22 +00:00
cpu_info * prometheus . Desc
2020-04-03 12:54:56 +00:00
memoryRSS * prometheus . Desc
memoryCache * prometheus . Desc
2020-02-14 17:08:22 +00:00
memoryUsed * prometheus . Desc
memoryTotal * prometheus . Desc
memoryFailCount * prometheus . Desc
2020-02-27 16:03:20 +00:00
memswUsed * prometheus . Desc
memswTotal * prometheus . Desc
memswFailCount * prometheus . Desc
2020-02-20 19:13:45 +00:00
info * prometheus . Desc
2020-10-02 17:53:15 +00:00
processExec * prometheus . Desc
2020-10-02 19:21:18 +00:00
logger log . Logger
2020-02-12 16:07:45 +00:00
}
func fileExists ( filename string ) bool {
info , err := os . Stat ( filename )
if os . IsNotExist ( err ) {
return false
}
return ! info . IsDir ( )
}
2020-10-02 17:53:15 +00:00
func sliceContains ( s interface { } , v interface { } ) bool {
slice := reflect . ValueOf ( s )
for i := 0 ; i < slice . Len ( ) ; i ++ {
if slice . Index ( i ) . Interface ( ) == v {
2020-02-12 16:07:45 +00:00
return true
}
}
return false
}
func subsystem ( ) ( [ ] cgroups . Subsystem , error ) {
s := [ ] cgroups . Subsystem {
cgroups . NewCpuacct ( * cgroupRoot ) ,
cgroups . NewMemory ( * cgroupRoot ) ,
}
return s , nil
}
2020-10-02 19:21:18 +00:00
func getCPUs ( name string , logger log . Logger ) ( [ ] string , error ) {
2020-02-12 17:40:24 +00:00
cpusPath := fmt . Sprintf ( "%s/cpuset%s/cpuset.cpus" , * cgroupRoot , name )
if ! fileExists ( cpusPath ) {
2020-10-01 14:50:22 +00:00
return nil , nil
2020-02-12 17:40:24 +00:00
}
2021-04-23 13:52:06 +00:00
cpusData , err := os . ReadFile ( cpusPath )
2020-02-12 17:40:24 +00:00
if err != nil {
2020-10-02 19:21:18 +00:00
level . Error ( logger ) . Log ( "msg" , "Error reading cpuset" , "cpuset" , cpusPath , "err" , err )
2020-10-01 14:50:22 +00:00
return nil , err
2020-02-12 17:40:24 +00:00
}
2020-02-13 15:42:16 +00:00
cpus , err := parseCpuSet ( strings . TrimSuffix ( string ( cpusData ) , "\n" ) )
2020-02-12 17:40:24 +00:00
if err != nil {
2020-10-02 19:21:18 +00:00
level . Error ( logger ) . Log ( "msg" , "Error parsing cpu set" , "cpuset" , cpusPath , "err" , err )
2020-10-01 14:50:22 +00:00
return nil , err
2020-02-12 17:40:24 +00:00
}
return cpus , nil
}
2020-10-01 14:50:22 +00:00
func parseCpuSet ( cpuset string ) ( [ ] string , error ) {
var cpus [ ] string
var start , end int
var err error
2020-02-12 16:07:45 +00:00
if cpuset == "" {
2020-10-01 14:50:22 +00:00
return nil , nil
2020-02-12 16:07:45 +00:00
}
ranges := strings . Split ( cpuset , "," )
for _ , r := range ranges {
boundaries := strings . Split ( r , "-" )
if len ( boundaries ) == 1 {
2020-10-01 14:50:22 +00:00
start , err = strconv . Atoi ( boundaries [ 0 ] )
2020-02-12 16:07:45 +00:00
if err != nil {
2020-10-01 14:50:22 +00:00
return nil , err
2020-02-12 16:07:45 +00:00
}
2020-10-01 14:50:22 +00:00
end = start
} else if len ( boundaries ) == 2 {
start , err = strconv . Atoi ( boundaries [ 0 ] )
2020-02-12 16:07:45 +00:00
if err != nil {
2020-10-01 14:50:22 +00:00
return nil , err
2020-02-12 16:07:45 +00:00
}
2020-10-01 14:50:22 +00:00
end , err = strconv . Atoi ( boundaries [ 1 ] )
if err != nil {
return nil , err
2020-02-12 16:07:45 +00:00
}
}
2020-10-01 14:50:22 +00:00
for e := start ; e <= end ; e ++ {
cpu := strconv . Itoa ( e )
cpus = append ( cpus , cpu )
}
2020-02-12 16:07:45 +00:00
}
return cpus , nil
}
2020-10-02 19:21:18 +00:00
func getInfo ( name string , metric * CgroupMetric , logger log . Logger ) {
2020-02-13 15:42:16 +00:00
pathBase := filepath . Base ( name )
userSlicePattern := regexp . MustCompile ( "^user-([0-9]+).slice$" )
userSliceMatch := userSlicePattern . FindStringSubmatch ( pathBase )
if len ( userSliceMatch ) == 2 {
metric . userslice = true
metric . uid = userSliceMatch [ 1 ]
user , err := user . LookupId ( metric . uid )
if err != nil {
2020-10-02 19:21:18 +00:00
level . Error ( logger ) . Log ( "msg" , "Error looking up user slice uid" , "uid" , metric . uid , "err" , err )
2020-02-13 15:42:16 +00:00
} else {
metric . username = user . Username
}
return
}
slurmPattern := regexp . MustCompile ( "^/slurm/uid_([0-9]+)/job_([0-9]+)$" )
slurmMatch := slurmPattern . FindStringSubmatch ( name )
if len ( slurmMatch ) == 3 {
metric . job = true
metric . uid = slurmMatch [ 1 ]
metric . jobid = slurmMatch [ 2 ]
user , err := user . LookupId ( metric . uid )
if err != nil {
2020-10-02 19:21:18 +00:00
level . Error ( logger ) . Log ( "msg" , "Error looking up slurm uid" , "uid" , metric . uid , "err" , err )
2020-02-13 15:42:16 +00:00
} else {
metric . username = user . Username
}
2020-02-13 15:52:43 +00:00
return
}
if strings . HasPrefix ( name , "/torque" ) {
metric . job = true
pathBaseSplit := strings . Split ( pathBase , "." )
metric . jobid = pathBaseSplit [ 0 ]
return
2020-02-13 15:42:16 +00:00
}
}
2020-10-02 19:21:18 +00:00
func getProcInfo ( pids [ ] int , metric * CgroupMetric , logger log . Logger ) {
2020-10-02 17:53:15 +00:00
executables := make ( map [ string ] float64 )
procFS , err := procfs . NewFS ( * procRoot )
if err != nil {
2020-10-02 19:21:18 +00:00
level . Error ( logger ) . Log ( "msg" , "Unable to open procfs" , "path" , * procRoot )
2020-10-02 17:53:15 +00:00
return
}
2020-10-02 20:24:03 +00:00
wg := & sync . WaitGroup { }
wg . Add ( len ( pids ) )
2020-10-02 17:53:15 +00:00
for _ , pid := range pids {
2020-10-02 20:24:03 +00:00
go func ( p int ) {
proc , err := procFS . Proc ( p )
if err != nil {
level . Error ( logger ) . Log ( "msg" , "Unable to read PID" , "pid" , p )
wg . Done ( )
return
}
executable , err := proc . Executable ( )
if err != nil {
level . Error ( logger ) . Log ( "msg" , "Unable to get executable for PID" , "pid" , p )
wg . Done ( )
return
}
if len ( executable ) > * collectProcMaxExec {
level . Debug ( logger ) . Log ( "msg" , "Executable will be truncated" , "executable" , executable , "len" , len ( executable ) , "pid" , p )
executable = executable [ len ( executable ) - * collectProcMaxExec : ]
executable = fmt . Sprintf ( "...%s" , executable )
}
metricLock . Lock ( )
executables [ executable ] += 1
metricLock . Unlock ( )
wg . Done ( )
} ( pid )
2020-10-02 17:53:15 +00:00
}
2020-10-02 20:24:03 +00:00
wg . Wait ( )
2020-10-02 17:53:15 +00:00
metric . processExec = executables
}
2020-10-02 19:21:18 +00:00
func getName ( p cgroups . Process , path string , logger log . Logger ) ( string , error ) {
2020-02-13 15:42:16 +00:00
cpuacctPath := filepath . Join ( * cgroupRoot , "cpuacct" )
name := strings . TrimPrefix ( p . Path , cpuacctPath )
name = strings . TrimSuffix ( name , "/" )
dirs := strings . Split ( name , "/" )
2020-10-02 19:21:18 +00:00
level . Debug ( logger ) . Log ( "msg" , "cgroup name" , "dirs" , fmt . Sprintf ( "%v" , dirs ) )
2020-02-13 15:42:16 +00:00
// Handle user.slice, system.slice and torque
if len ( dirs ) == 3 {
return name , nil
}
// Handle deeper cgroup where we want higher level, mainly SLURM
var keepDirs [ ] string
for i , d := range dirs {
if strings . HasPrefix ( d , "job_" ) {
keepDirs = dirs [ 0 : i + 1 ]
break
}
}
if keepDirs == nil {
return name , nil
}
name = strings . Join ( keepDirs , "/" )
return name , nil
}
2020-10-02 19:21:18 +00:00
func NewExporter ( paths [ ] string , logger log . Logger ) * Exporter {
2020-02-12 16:07:45 +00:00
return & Exporter {
paths : paths ,
cpuUser : prometheus . NewDesc ( prometheus . BuildFQName ( namespace , "cpu" , "user_seconds" ) ,
"Cumalitive CPU user seconds for cgroup" , [ ] string { "cgroup" } , nil ) ,
2020-02-20 19:31:49 +00:00
cpuSystem : prometheus . NewDesc ( prometheus . BuildFQName ( namespace , "cpu" , "system_seconds" ) ,
"Cumalitive CPU system seconds for cgroup" , [ ] string { "cgroup" } , nil ) ,
2020-02-12 16:07:45 +00:00
cpuTotal : prometheus . NewDesc ( prometheus . BuildFQName ( namespace , "cpu" , "total_seconds" ) ,
"Cumalitive CPU total seconds for cgroup" , [ ] string { "cgroup" } , nil ) ,
cpus : prometheus . NewDesc ( prometheus . BuildFQName ( namespace , "" , "cpus" ) ,
"Number of CPUs in the cgroup" , [ ] string { "cgroup" } , nil ) ,
2020-10-01 14:50:22 +00:00
cpu_info : prometheus . NewDesc ( prometheus . BuildFQName ( namespace , "" , "cpu_info" ) ,
"Information about the cgroup CPUs" , [ ] string { "cgroup" , "cpus" } , nil ) ,
2020-04-03 12:54:56 +00:00
memoryRSS : prometheus . NewDesc ( prometheus . BuildFQName ( namespace , "memory" , "rss_bytes" ) ,
"Memory RSS used in bytes" , [ ] string { "cgroup" } , nil ) ,
memoryCache : prometheus . NewDesc ( prometheus . BuildFQName ( namespace , "memory" , "cache_bytes" ) ,
"Memory cache used in bytes" , [ ] string { "cgroup" } , nil ) ,
2020-02-12 16:07:45 +00:00
memoryUsed : prometheus . NewDesc ( prometheus . BuildFQName ( namespace , "memory" , "used_bytes" ) ,
"Memory used in bytes" , [ ] string { "cgroup" } , nil ) ,
memoryTotal : prometheus . NewDesc ( prometheus . BuildFQName ( namespace , "memory" , "total_bytes" ) ,
"Memory total given to cgroup in bytes" , [ ] string { "cgroup" } , nil ) ,
2020-02-14 17:08:22 +00:00
memoryFailCount : prometheus . NewDesc ( prometheus . BuildFQName ( namespace , "memory" , "fail_count" ) ,
"Memory fail count" , [ ] string { "cgroup" } , nil ) ,
2020-02-27 16:03:20 +00:00
memswUsed : prometheus . NewDesc ( prometheus . BuildFQName ( namespace , "memsw" , "used_bytes" ) ,
2020-02-12 17:49:06 +00:00
"Swap used in bytes" , [ ] string { "cgroup" } , nil ) ,
2020-02-27 16:03:20 +00:00
memswTotal : prometheus . NewDesc ( prometheus . BuildFQName ( namespace , "memsw" , "total_bytes" ) ,
2020-02-12 17:49:06 +00:00
"Swap total given to cgroup in bytes" , [ ] string { "cgroup" } , nil ) ,
2020-02-27 16:03:20 +00:00
memswFailCount : prometheus . NewDesc ( prometheus . BuildFQName ( namespace , "memsw" , "fail_count" ) ,
2020-02-14 17:08:22 +00:00
"Swap fail count" , [ ] string { "cgroup" } , nil ) ,
2020-02-20 19:13:45 +00:00
info : prometheus . NewDesc ( prometheus . BuildFQName ( namespace , "" , "info" ) ,
2020-02-13 15:42:16 +00:00
"User slice information" , [ ] string { "cgroup" , "username" , "uid" , "jobid" } , nil ) ,
2020-10-02 17:53:15 +00:00
processExec : prometheus . NewDesc ( prometheus . BuildFQName ( namespace , "" , "process_exec_count" ) ,
"Count of instances of a given process" , [ ] string { "cgroup" , "exec" } , nil ) ,
2020-02-20 19:13:45 +00:00
collectError : prometheus . NewDesc ( prometheus . BuildFQName ( namespace , "exporter" , "collect_error" ) ,
2020-02-20 20:12:32 +00:00
"Indicates collection error, 0=no error, 1=error" , [ ] string { "cgroup" } , nil ) ,
2020-10-02 19:21:18 +00:00
logger : logger ,
2020-02-12 16:07:45 +00:00
}
}
2020-10-02 20:24:03 +00:00
func ( e * Exporter ) getMetrics ( name string , pids map [ string ] [ ] int ) ( CgroupMetric , error ) {
metric := CgroupMetric { name : name }
level . Debug ( e . logger ) . Log ( "msg" , "Loading cgroup" , "path" , name )
ctrl , err := cgroups . Load ( subsystem , func ( subsystem cgroups . Name ) ( string , error ) {
return name , nil
} )
if err != nil {
level . Error ( e . logger ) . Log ( "msg" , "Failed to load cgroups" , "path" , name , "err" , err )
metric . err = true
return metric , err
}
stats , _ := ctrl . Stat ( cgroups . IgnoreNotExist )
metric . cpuUser = float64 ( stats . CPU . Usage . User ) / 1000000000.0
metric . cpuSystem = float64 ( stats . CPU . Usage . Kernel ) / 1000000000.0
metric . cpuTotal = float64 ( stats . CPU . Usage . Total ) / 1000000000.0
metric . memoryRSS = float64 ( stats . Memory . TotalRSS )
metric . memoryCache = float64 ( stats . Memory . TotalCache )
metric . memoryUsed = float64 ( stats . Memory . Usage . Usage )
metric . memoryTotal = float64 ( stats . Memory . Usage . Limit )
metric . memoryFailCount = float64 ( stats . Memory . Usage . Failcnt )
metric . memswUsed = float64 ( stats . Memory . Swap . Usage )
metric . memswTotal = float64 ( stats . Memory . Swap . Limit )
metric . memswFailCount = float64 ( stats . Memory . Swap . Failcnt )
if cpus , err := getCPUs ( name , e . logger ) ; err == nil {
metric . cpus = len ( cpus )
metric . cpu_list = strings . Join ( cpus , "," )
}
getInfo ( name , & metric , e . logger )
if * collectProc {
if val , ok := pids [ name ] ; ok {
level . Debug ( e . logger ) . Log ( "msg" , "Get process info" , "pids" , fmt . Sprintf ( "%v" , val ) )
getProcInfo ( val , & metric , e . logger )
} else {
level . Error ( e . logger ) . Log ( "msg" , "Unable to get PIDs" , "path" , name )
}
}
return metric , nil
}
2020-02-20 20:12:32 +00:00
func ( e * Exporter ) collect ( ) ( [ ] CgroupMetric , error ) {
2020-02-12 16:07:45 +00:00
var names [ ] string
var metrics [ ] CgroupMetric
for _ , path := range e . paths {
2020-10-02 19:21:18 +00:00
level . Debug ( e . logger ) . Log ( "msg" , "Loading cgroup" , "path" , path )
2020-02-12 16:07:45 +00:00
control , err := cgroups . Load ( subsystem , cgroups . StaticPath ( path ) )
if err != nil {
2020-10-02 19:21:18 +00:00
level . Error ( e . logger ) . Log ( "msg" , "Error loading cgroup subsystem" , "path" , path , "err" , err )
2020-02-20 20:12:32 +00:00
metric := CgroupMetric { name : path , err : true }
metrics = append ( metrics , metric )
2020-02-20 18:48:02 +00:00
continue
2020-02-12 16:07:45 +00:00
}
processes , err := control . Processes ( cgroups . Cpuacct , true )
if err != nil {
2020-10-02 19:21:18 +00:00
level . Error ( e . logger ) . Log ( "msg" , "Error loading cgroup processes" , "path" , path , "err" , err )
2020-02-20 20:12:32 +00:00
metric := CgroupMetric { name : path , err : true }
metrics = append ( metrics , metric )
2020-02-20 18:48:02 +00:00
continue
2020-02-12 16:07:45 +00:00
}
2020-10-02 19:21:18 +00:00
level . Debug ( e . logger ) . Log ( "msg" , "Found processes" , "processes" , len ( processes ) )
2020-10-02 17:53:15 +00:00
pids := make ( map [ string ] [ ] int )
2020-02-12 16:07:45 +00:00
for _ , p := range processes {
2020-10-02 19:21:18 +00:00
level . Debug ( e . logger ) . Log ( "msg" , "Get Name" , "process" , p . Path , "pid" , p . Pid , "path" , path )
name , err := getName ( p , path , e . logger )
2020-02-13 15:42:16 +00:00
if err != nil {
2020-10-02 19:21:18 +00:00
level . Error ( e . logger ) . Log ( "msg" , "Error getting cgroup name for process" , "process" , p . Path , "path" , path , "err" , err )
2020-02-13 15:42:16 +00:00
continue
}
2020-10-02 17:53:15 +00:00
if ! sliceContains ( names , name ) {
names = append ( names , name )
}
if val , ok := pids [ name ] ; ok {
if ! sliceContains ( val , p . Pid ) {
val = append ( val , p . Pid )
}
pids [ name ] = val
} else {
pids [ name ] = [ ] int { p . Pid }
2020-02-12 16:07:45 +00:00
}
2020-10-02 17:53:15 +00:00
}
2020-10-02 20:24:03 +00:00
wg := & sync . WaitGroup { }
wg . Add ( len ( names ) )
2020-10-02 17:53:15 +00:00
for _ , name := range names {
2020-10-02 20:24:03 +00:00
go func ( n string , p map [ string ] [ ] int ) {
metric , _ := e . getMetrics ( n , p )
metricLock . Lock ( )
2020-02-20 20:12:32 +00:00
metrics = append ( metrics , metric )
2020-10-02 20:24:03 +00:00
metricLock . Unlock ( )
wg . Done ( )
} ( name , pids )
2020-02-12 16:07:45 +00:00
}
2020-10-02 20:24:03 +00:00
wg . Wait ( )
2020-02-12 16:07:45 +00:00
}
return metrics , nil
}
func ( e * Exporter ) Describe ( ch chan <- * prometheus . Desc ) {
ch <- e . cpuUser
ch <- e . cpuSystem
ch <- e . cpuTotal
ch <- e . cpus
2020-10-01 14:50:22 +00:00
ch <- e . cpu_info
2020-04-03 12:54:56 +00:00
ch <- e . memoryRSS
ch <- e . memoryCache
2020-02-12 16:07:45 +00:00
ch <- e . memoryUsed
ch <- e . memoryTotal
2020-02-14 17:08:22 +00:00
ch <- e . memoryFailCount
2020-02-27 16:03:20 +00:00
ch <- e . memswUsed
ch <- e . memswTotal
ch <- e . memswFailCount
2020-10-02 17:53:15 +00:00
ch <- e . info
if * collectProc {
ch <- e . processExec
}
2020-02-12 16:07:45 +00:00
}
func ( e * Exporter ) Collect ( ch chan <- prometheus . Metric ) {
2020-02-20 20:12:32 +00:00
metrics , _ := e . collect ( )
2020-02-12 16:07:45 +00:00
for _ , m := range metrics {
2020-02-20 20:12:32 +00:00
if m . err {
ch <- prometheus . MustNewConstMetric ( e . collectError , prometheus . GaugeValue , 1 , m . name )
}
2020-02-12 16:07:45 +00:00
ch <- prometheus . MustNewConstMetric ( e . cpuUser , prometheus . GaugeValue , m . cpuUser , m . name )
ch <- prometheus . MustNewConstMetric ( e . cpuSystem , prometheus . GaugeValue , m . cpuSystem , m . name )
ch <- prometheus . MustNewConstMetric ( e . cpuTotal , prometheus . GaugeValue , m . cpuTotal , m . name )
ch <- prometheus . MustNewConstMetric ( e . cpus , prometheus . GaugeValue , float64 ( m . cpus ) , m . name )
2020-10-01 14:50:22 +00:00
ch <- prometheus . MustNewConstMetric ( e . cpu_info , prometheus . GaugeValue , 1 , m . name , m . cpu_list )
2020-04-03 12:54:56 +00:00
ch <- prometheus . MustNewConstMetric ( e . memoryRSS , prometheus . GaugeValue , m . memoryRSS , m . name )
ch <- prometheus . MustNewConstMetric ( e . memoryCache , prometheus . GaugeValue , m . memoryCache , m . name )
2020-02-12 16:07:45 +00:00
ch <- prometheus . MustNewConstMetric ( e . memoryUsed , prometheus . GaugeValue , m . memoryUsed , m . name )
ch <- prometheus . MustNewConstMetric ( e . memoryTotal , prometheus . GaugeValue , m . memoryTotal , m . name )
2020-02-14 17:08:22 +00:00
ch <- prometheus . MustNewConstMetric ( e . memoryFailCount , prometheus . GaugeValue , m . memoryFailCount , m . name )
2020-02-27 16:03:20 +00:00
ch <- prometheus . MustNewConstMetric ( e . memswUsed , prometheus . GaugeValue , m . memswUsed , m . name )
ch <- prometheus . MustNewConstMetric ( e . memswTotal , prometheus . GaugeValue , m . memswTotal , m . name )
ch <- prometheus . MustNewConstMetric ( e . memswFailCount , prometheus . GaugeValue , m . memswFailCount , m . name )
2020-02-20 19:13:45 +00:00
if m . userslice || m . job {
ch <- prometheus . MustNewConstMetric ( e . info , prometheus . GaugeValue , 1 , m . name , m . username , m . uid , m . jobid )
2020-02-13 15:42:16 +00:00
}
2020-10-02 17:53:15 +00:00
if * collectProc {
for exec , count := range m . processExec {
ch <- prometheus . MustNewConstMetric ( e . processExec , prometheus . GaugeValue , count , m . name , exec )
}
}
2020-02-12 16:07:45 +00:00
}
}
2020-10-02 19:21:18 +00:00
func metricsHandler ( logger log . Logger ) http . HandlerFunc {
2020-02-12 16:07:45 +00:00
return func ( w http . ResponseWriter , r * http . Request ) {
registry := prometheus . NewRegistry ( )
paths := strings . Split ( * configPaths , "," )
2020-10-02 19:21:18 +00:00
exporter := NewExporter ( paths , logger )
2020-02-12 16:07:45 +00:00
registry . MustRegister ( exporter )
gatherers := prometheus . Gatherers { registry }
if ! * disableExporterMetrics {
gatherers = append ( gatherers , prometheus . DefaultGatherer )
}
// Delegate http serving to Prometheus client library, which will call collector.Collect.
h := promhttp . HandlerFor ( gatherers , promhttp . HandlerOpts { } )
h . ServeHTTP ( w , r )
}
}
func main ( ) {
metricsEndpoint := "/metrics"
2020-10-02 19:21:18 +00:00
promlogConfig := & promlog . Config { }
flag . AddFlags ( kingpin . CommandLine , promlogConfig )
2020-02-12 16:07:45 +00:00
kingpin . Version ( version . Print ( "cgroup_exporter" ) )
kingpin . HelpFlag . Short ( 'h' )
kingpin . Parse ( )
2020-10-02 19:21:18 +00:00
logger := promlog . New ( promlogConfig )
level . Info ( logger ) . Log ( "msg" , "Starting cgroup_exporter" , "version" , version . Info ( ) )
level . Info ( logger ) . Log ( "msg" , "Build context" , "build_context" , version . BuildContext ( ) )
level . Info ( logger ) . Log ( "msg" , "Starting Server" , "address" , * listenAddress )
2020-02-12 16:07:45 +00:00
http . HandleFunc ( "/" , func ( w http . ResponseWriter , r * http . Request ) {
//nolint:errcheck
w . Write ( [ ] byte ( ` < html >
< head > < title > cgroup Exporter < / title > < / head >
< body >
< h1 > cgroup Exporter < / h1 >
< p > < a href = ' ` + metricsEndpoint + ` ' > Metrics < / a > < / p >
< / body >
< / html > ` ) )
} )
2020-10-02 19:21:18 +00:00
http . Handle ( metricsEndpoint , metricsHandler ( logger ) )
err := http . ListenAndServe ( * listenAddress , nil )
if err != nil {
level . Error ( logger ) . Log ( "err" , err )
os . Exit ( 1 )
}
2020-02-12 16:07:45 +00:00
}