Friday, February 7, 2014

Nagios plugin examples

I have seen many nagios beginners faced difficulty to configure the plugin as per their requirement.  Most of the time, they just wanted to visit some existing configuration, which they can use as a model for their configurations. But the online resources rarely gives the live examples. I have picked few basic plugins to demonstrate the configuration
check_users : Plugin to monitor the number of users logged in to the system
check_load  : Plugin to monitor the run-time load of the system
check_disk  : Plugin to monitor the disk usage of the system
check_procs: Plugin to monitor the running processes in a system
check_log   : Plugin to monitor a particular log file in the system. If any particular error logged, then nagios will alert the user.

Example Configurations :

1- Configuration example at Nagios server side
File : remotehost.cfg
define service{
        use                             generic-service
        host_name                     tester1
        service_description             check Load
        is_volatile                     1
        check_command                   check_nrpe!check_load
        max_check_attempts              1
        active_checks_enabled           0
        passive_checks_enabled          1
        contact_groups                  admins
}

# Define a service to check the users on the Remote  machine  tester1

define service{
        use                             generic-service
        host_name                       pkvm-tester1
        service_description             check Disk
        is_volatile                     1
        check_command                   check_nrpe!check_hda1
        max_check_attempts              1
        active_checks_enabled           0
        passive_checks_enabled          1
        contact_groups                  admins
}

# Define a service to check the Processes on the Remote  machine tester1

define service{
        use                             generic-service
        host_name                       pkvm-tester1
        service_description             check Procs Zombie
        is_volatile                     1
        check_command                   check_nrpe!check_zombie_procs
        max_check_attempts              1
        active_checks_enabled           0
        passive_checks_enabled          1
        contact_groups                  admins
}

# Define a service to check the Log on the Remote  machine tester1
define service{
        use                             generic-service
        host_name                       pkvm-tester1
        service_description             check system log for SSL
        is_volatile                     1
        check_command                   check_nrpe!check_sys_log
        max_check_attempts              1
        active_checks_enabled           0
        passive_checks_enabled          1
        contact_groups                  admins
}

# Define a service to check the Log on the Remote  machine  tester1
define service{
        use                             generic-service
        host_name                       pkvm-tester1
        service_description             check system log for SElinux
        is_volatile                     1
        check_command                   check_nrpe!check_sys_log_2
        max_check_attempts              1
        active_checks_enabled           0
        passive_checks_enabled          1
        contact_groups                  admins
}

# Define a service to check the Processes on the Remote  machine tester1 
define service{
        use                             generic-service
        host_name                       pkvm-tester1
        service_description             check total Procs
        is_volatile                     1
        check_command                   check_nrpe!check_total_procs
        max_check_attempts              1
        active_checks_enabled           0
        passive_checks_enabled          1
        contact_groups                  admins
}
File : Commands.cfg
# Command definition for the remote execution. Here $ARG1$ will be the carrier of the remote execution commands defined above in remotehost.cfg.
define command{
command_name    check_nrpe
command_line    $USER1$/check_nrpe -H $HOSTADDRESS$ -c $ARG1$ 
}

2- Configuration example at Nagios client side.
File : nrpe.cfg
command[check_users]=/usr/lib64/nagios/plugins/check_users -w 5 -c 10

command[check_load]=/usr/lib64/nagios/plugins/check_load -w 15,10,5 -c 30,25,20

command[check_hda1]=/usr/lib64/nagios/plugins/check_disk -w 20% -c 10% -p /dev/hda1

command[check_zombie_procs]=/usr/lib64/nagios/plugins/check_procs -w 5 -c 10 -s Z

command[check_total_procs]=/usr/lib64/nagios/plugins/check_procs -w 150 -c 200

command[check_sys_log]=/usr/lib64/nagios/plugins/check_log -F '/var/log/messages' -O /tmp/oldlog -q 'Error - Could not complete SSL handshake'

command[check_sys_log_2]=/usr/lib64/nagios/plugins/check_log -F '/var/log/messages' -O /tmp/oldlog -q 'NRPE Error:'

How it works:

 when a Nagios process find a command defined in remotehost.cfg, then it looks the configuration file  commands.cfg for the definition of the command. Based on the command definition found, here check_nrpe, and its argument will send to nagios daemon running in remotehost.cfg.
The command represent the argument to the check_nrpe (Eg:  check_nrpe!check_total_procs) will be defined in remotehost as given in nrpe.cfg file. nrpe daemon will execute it and give back the result. That result will be displayed in nagios server. Thats it!