check_cpu.pl
changeset 0 a5f5e3961230
child 1 9df9a8615b9f
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/check_cpu.pl	Fri Oct 14 16:57:39 2011 +0200
@@ -0,0 +1,278 @@
+#!/usr/bin/perl -w
+
+use 5.010;
+use strict;
+use warnings;
+use File::Basename;
+use Pod::Usage;
+use Getopt::Long;
+
+$ENV{LANG} = "C";
+
+my $ME      = basename $0;
+my $VERSION = "0.1";
+my $mpstat  = "/usr/bin/mpstat";
+
+sub version($$);
+sub usage($$);
+
+my $opt = {
+    processor  => 'ALL',
+    averageall => 0,
+    warning    => 10,
+    critical   => 80,
+    interval   => 1,
+    ok         => 0
+};
+
+my %ERRORS = (
+    OK       => 0,
+    WARNING  => 1,
+    CRITICAL => 2,
+    UNKNOWN  => 3
+);
+
+MAIN: {
+    Getopt::Long::Configure('bundling');
+    GetOptions(
+        "p|processor=s" => \$opt->{processor},
+        "a|averageall"  => \$opt->{averageall},
+        "w|warning=i"   => \$opt->{warning},
+        "c|critical=i"  => \$opt->{critical},
+        "i|interval=i"  => \$opt->{interval},
+        "o|ok"          => \$opt->{ok},
+        "h|help" => sub { pod2usage(-verbose => 1, -exitval => $ERRORS{OK}) },
+        "m|man" => sub { pod2usage(-verbose => 2, -exitval => $ERRORS{OK}) },
+        "V|version" => sub { version($ME, $VERSION); exit $ERRORS{OK}; }
+    ) or pod2usage(-verbose => 1, -exitval => $ERRORS{CRITICAL});
+
+    not -x $mpstat and do {
+        print "CPU UNKNOWN - $mpstat ($!)\n";
+        exit $ERRORS{UNKNOWN};
+    };
+
+    usage($opt->{processor}, $opt->{interval});
+}
+
+sub version($$) {
+    my $progname = shift;
+    my $version  = shift;
+
+    print <<_VERSION
+$progname version $version
+
+Copyright (C) 2011 by Christian Arnold and Schlittermann internet & unix support.
+$progname comes with ABSOLUTELY NO WARRANTY.  This is free software,
+and you are welcome to redistribute it under certain conditions.
+See the GNU General Public Licence for details.
+_VERSION
+}
+
+sub usage($$) {
+    my $processor = shift;
+    my $interval  = shift;
+
+    my @status_information = ();
+    my @performance_data   = ();
+    my @values             = ();
+
+    foreach (`$mpstat -P $processor $interval 1`) {
+        /^\d{2}:\d{2}:\d{2}\s+(\S+)/i or next;
+
+        state @fields;
+        my %stat;
+
+        $1 eq "CPU" and @fields = map { /^%?(.*)/ } split and next;
+        if ($opt->{averageall}) {
+            $1 =~ /all/ and do {
+                @stat{@fields} = split;
+                my $output = sprintf(
+"$stat{CPU} cpus: %d%\% for the last $opt->{interval}s | $stat{CPU}=%d%\%;%d;%d;0;0",
+                    100 - $stat{idle}, 100 - $stat{idle},
+                    $opt->{warning},   $opt->{critical}
+                );
+                if ($opt->{ok}) {
+                    print "CPU OK - average usage over " . $output . "\n";
+                    exit $ERRORS{OK};
+                }
+
+                my $value = sprintf("%d", 100 - $stat{idle});
+                if ($value >= $opt->{critical}) {
+                    print "CPU CRITICAL - average usage over " . $output . "\n";
+                    exit $ERRORS{CRITICAL};
+                }
+                elsif ($value >= $opt->{warning}) {
+                    print "CPU WARNING - average usage over " . $output . "\n";
+                    exit $ERRORS{WARNING};
+                }
+                print "CPU OK - average usage over " . $output . "\n";
+                exit $ERRORS{OK};
+            };
+        }
+
+        @stat{@fields} = split;
+        if ($1 eq "all") {
+            push @status_information,
+              sprintf("$stat{CPU}: %d%\%", 100 - $stat{idle});
+            push @performance_data,
+              sprintf(
+                "$stat{CPU}=%d%\%;%d;%d;0;0",
+                100 - $stat{idle},
+                $opt->{warning}, $opt->{critical}
+              );
+        }
+        else {
+            push @status_information,
+              sprintf("cpu$stat{CPU}: %d%\%", 100 - $stat{idle});
+            push @performance_data,
+              sprintf(
+                "cpu$stat{CPU}=%d%\%;%d;%d;0;0",
+                100 - $stat{idle},
+                $opt->{warning}, $opt->{critical}
+              );
+        }
+        push @values, sprintf("%d", 100 - $stat{idle});
+    }
+
+    if ($opt->{ok}) {
+        print "CPU OK - average usage @status_information"
+          . " for the last $opt->{interval}s | "
+          . "@performance_data" . "\n";
+        exit $ERRORS{OK};
+    }
+
+    my $rc = "OK";
+    foreach (@values) {
+        if ($_ >= $opt->{critical}) {
+            $rc = "CRITICAL";
+            last;
+        }
+        elsif ($_ >= $opt->{warning}) {
+            $rc = "WARNING";
+        }
+    }
+
+    print "CPU $rc - average cpu usage @status_information"
+      . " for the last $opt->{interval}s | "
+      . "@performance_data" . "\n";
+    exit $ERRORS{$rc};
+}
+
+__END__
+
+=head1 NAME
+
+check_cpu - nagios plugin to check cpu usage
+
+=head1 SYNOPSIS
+
+B<check_cpu> [OPTION...]
+
+=head1 OPTIONS
+
+=over
+
+=item B<-p>, B<--processor> { cpu [,...] | ALL }
+
+Indicate the processor number for which usage are to be reported.  I<cpu> is the processor number.
+Note that processor 0 is the first processor.  The B<ALL> keyword indicates that usage are to be reported for all processors.
+The default value is B<ALL>.
+
+=item B<-a>, B<--averageall>
+
+Only output the average cpu usage over all processors.
+
+=item B<-w>, B<--warning> INTEGER
+
+Exit with I<WARNING> status if cpu usage is greater than INTEGER in percent and less than B<--critical> INTEGER in percent.
+The default value is B<10> percent.
+
+=item B<-c>, B<--critical> INTEGER
+
+Exit with I<CRITICAL> status if usage is greater than INTEGER in percent.  The default value is B<80> percent.
+
+=item B<-i>, B<--interval> INTEGER
+
+The I<interval> parameter specifies the average time period in B<seconds> for cpu usage.  The default value is B<1> second.
+
+=item B<-o>, B<--ok>
+
+Exit always with I<OK> status.
+
+=item B<-h>, B<--help>
+
+Print detailed help screen.
+
+=item B<-m>, B<--man>
+
+Print manual page.
+
+=item B<-V>, B<--version>
+
+Print version information.
+
+=back
+
+=head1 DESCRIPTION
+
+Nagios plugin for checking cpu usage. This plugin requires the B<sysstat> package on your system.
+
+=head1 EXAMPLES
+
+=over
+
+=item B<check_cpu>
+
+Output average cpu usage for each processor for the last second.
+
+=item B<check_cpu -i 10>
+
+Output average cpu usage for each processor for the last 10 seconds.
+
+=item B<check_cpu -a -i 10>
+
+Output only the average cpu usage over all processors for the last 10 seconds.
+
+=item B<check_cpu -o -a -i 10>
+
+Output only the average cpu usage over all processors for the last 10 seconds and the exit value is always I<OK>.
+
+=back
+
+=head1 EXIT VALUES
+
+=over
+
+=item B<0>
+
+status I<OK>
+
+=item B<1>
+
+status I<WARNING>
+
+=item B<2>
+
+status I<CRITICAL>
+
+=item B<3>
+
+status I<UNKNOWN>
+
+=back
+
+=head1 VERSION
+
+This man page is current for version 0.1 of check_cpu.
+
+=head1 AUTHOR
+
+Written by Christian Arnold L<arnold@schlittermann.de>
+
+=head1 COPYRIGHT
+
+Copyright (C) 2011 by Christian Arnold and Schlittermann internet & unix support.
+This is free software, and you are welcome to redistribute it under certain conditions.
+See the GNU General Public Licence for details.
+
+=cut