Agenda: exercises-nagios.txt

File exercises-nagios.txt, 31.1 KB (added by b.candler, 7 years ago)

Text version of nagios exercises

Line 
1
2Nagios Installation and Configuration
3
4Notes:
5------
6* Commands preceded with "$" imply that you should execute the command as
7  a general user - not as root.
8* Commands preceded with "#" imply that you should be working as root.
9* Commands with more specific command lines (e.g. "RTR-GW>" or "mysql>")
10  imply that you are executing commands on remote equipment, or within
11  another program.
12
13Exercises
14---------
15
16Exercises Part I
17----------------
18
190. Log in to your virtual machine as the sysadm user.
20
211. Install Nagios Version 3
22---------------------------
23
24        $ sudo apt-get install nagios3 nagios3-doc
25
26You will be prompted for nagiosadmin password. Give it the normal workshop
27password.
28
29
302. See Initial Nagios Configuration
31------------------------------------
32
33Open a browser, and go to your machine like this:
34
35        http://pcN.ws.nsrc.org/nagios3/
36
37At the login prompt, login as:
38
39        user: nagiosadmin
40        pass: <CLASS PASSWORD>
41
42Browse to the "Host Detail" page to see what's already configured.
43
44
453. Remove the File host-gateway_nagios3.cfg
46-------------------------------------------
47
48        $ sudo bash
49        # cd /etc/nagios3/conf.d
50        # rm host-gateway_nagios3.cfg
51
52
534. Update the File hostgroups_nagios2.cfg
54-----------------------------------------
55
56        # editor hostgroups_nagios2.cfg
57
58Go to the bottom of the file and find the entry:
59
60
61define hostgroup {
62        hostgroup_name  ping-servers
63                alias           Pingable servers
64                members         gateway
65        }
66
67
68Change the members line so that this looks like:
69
70
71define hostgroup {
72        hostgroup_name  ping-servers
73                alias           Pingable servers
74                members         rtrX
75        }
76
77Where "rtrX" is the router for your group. Now save and exit the from the file.
78
79
805. Add Routers, PCs and Switches
81--------------------------------
82
83We will create three files, routers.cfg, switches.cfg and pcs.cfg and make
84entries for the hardware in our classroom.
85
865a. Creating the switches.cfg file
87----------------------------------
88
89        # editor switches.cfg
90
91In this file add the following entry:
92
93define host {
94    use         generic-host
95    host_name   sw
96    alias       Backbone Switch
97    address     10.10.0.253
98}
99
100Save the file and exit.
101
1025b. Creating the routers.cfg file
103---------------------------------
104
105We have 10 total routers. These are rtr1-rtr9 and gw-rtr. We will define entries
106for each of these.
107
108        # editor routers.cfg
109
110
111define host {
112    use         generic-host
113    host_name   gw-rtr
114    alias       Classrooom Gateway Router
115    address     10.10.0.254
116}
117
118define host {
119    use         generic-host
120    host_name   rtr1
121    alias       Group 2 Gateway Router
122    address     10.10.1.254
123}
124
125define host {
126    use         generic-host
127    host_name   rtr2
128    alias       Group 2 Gateway Router
129    address     10.10.2.254
130}
131
132define host {
133    use         generic-host
134    host_name   rtr3
135    alias       Group 3 Gateway Router
136    address     10.10.3.254
137}
138
139define host {
140    use         generic-host
141    host_name   rtr4
142    alias       Group 4 Gateway Router
143    address     10.10.4.254
144}
145
146define host {
147    use         generic-host
148    host_name   rtr5
149    alias       Group 5 Gateway Router
150    address     10.10.5.254
151}
152
153define host {
154    use         generic-host
155    host_name   rtr6
156    alias       Group 6 Gateway Router
157    address     10.10.6.254
158}
159
160define host {
161    use         generic-host
162    host_name   rtr7
163    alias       Group 7 Gateway Router
164    address     10.10.7.254
165}
166
167define host {
168    use         generic-host
169    host_name   rtr8
170    alias       Group 8 Gateway Router
171    address     10.10.8.254
172}
173
174define host {
175    use         generic-host
176    host_name   rtr9
177    alias       Group 9 Gateway Router
178    address     10.10.9.254
179}
180
181define host {
182    use         generic-host
183    host_name   ap1
184    alias       Wireless Access Point 1
185    address     10.10.0.251
186}
187
188define host {
189    use         generic-host
190    host_name   ap2
191    alias       Wireless Access Point 2
192    address     10.10.0.252
193}
194
195
196Now save and exit from the file.
197
198
1995c. Creating the pcs.cfg File
200-----------------------------
201
202Now we will create entries for all the Virtual Machines in our classroom. Below
203we give you the first few entries. You should complete the file with as many PCs
204as you wish to add. We recommend thet, at least, you add the 4 PCs that are members
205of your group as well as an entry for the classroom NOC.
206
207        # editors pcs.cfg
208
209
210define host {
211    use         generic-host
212    host_name   noc
213    alias       Workshop NOC machine
214    address     10.10.0.250
215}
216
217#
218# Group 1
219#
220
221define host {
222    use         generic-host
223    host_name   pc1
224    alias       pc1
225    address     10.10.1.1
226}
227
228define host {
229    use         generic-host
230    host_name   pc2
231    alias       pc2
232    address     10.10.1.2
233}
234
235define host {
236    use         generic-host
237    host_name   pc3
238    alias       pc3
239    address     10.10.1.3
240}
241
242define host {
243    use         generic-host
244    host_name   pc4
245    alias       pc4
246    address     10.10.1.4
247}
248
249
250You can save and exit from the file now, or you can continue to add more PC entries.
251If you have not added PCs for your group be sure to do that before you exit from the
252file.
253
254
255
256STEPS 6a - 6c SHOULD BE REPEATED WHENEVER YOU UPDATE THE CONFIGURATION!
257=======================================================================
258   
2596a. Verify that your configuration files are OK
260-----------------------------------------------
261
262        # nagios3 -v /etc/nagios3/nagios.cfg
263
264
265    ... You should get some warnings like :
266
267Checking services...
268        Checked 7 services.
269Checking hosts...
270Warning: Host 'gw-rtr' has no services associated with it!
271Warning: Host 'rtr1' has no services associated with it!
272Warning: Host 'rtr2' has no services associated with it!
273
274etc....
275...
276Total Warnings: N
277Total Errors:   0
278
279Things look okay - No serious problems were detected during the check.
280Nagios is saying that it's unusual to monitor a device just for its
281existence on the network, without also monitoring some service.
282
283
2846b. Reload/Restart Nagios
285-------------------------
286
287        # service nagios3 restart
288
289Not always 100% reliable to use the "restart" option due to a bug in the Nagios init
290script. To be sure you may want to get used to doing:
291
292        # service nagios3 stop
293        # service nagios3 start
294
2956c. Verify via the Web Interface
296--------------------------------
297
298Go to the web interface (http://pcN.ws.nsrc.org/nagios3) and check that the hosts
299you just added are now visible in the interface. Click on the "Host Detail" item
300on the left of the Nagios screen to see this. You may see it in "PENDING"
301status until the check is carried out.
302
303
304HINT: You will be doing this a lot. If you do it all on one line, like this,
305then you can hit cursor-up and rerun all in one go:
306
307        # nagios3 -v /etc/nagios3/nagios.cfg && /etc/init.d/nagios3 restart
308
309The '&&' ensures that the restart only happens if the config is valid.
310
311
3127. View Host Detail and Status Map
313
314Go to http://pcN.ws.nsrc.org/nagios3
315
316Click on the "Host Detail" item on the left. Are all the hosts you have defined
317listed? Are they up?
318
319Click on the "Status Map" item on the left. You should see all your hosts with the
320Nagios process in the middle.
321
322
323
324PART II
325Configure Service check for the classroom NOC
326-----------------------------------------------------------------------------
327
3280. Configuring
329
330Now that we have our hardware configured we can start telling Nagios what services to monitor
331on the configured hardware, how to group the hardware in interesting ways, how to group
332services, etc.
333
3341. Associate a service check for our classroom NOC
335
336    # editor hostgroups_nagios2.cfg
337
338    - Find the hostgroup named "ssh-servers". In the members section of the defintion
339      change the line:
340
341members                 localhost
342
343    to
344
345members                 localhost,noc
346
347Exit and save the file.
348
349Verify that your changes are OK:
350
351        # nagios3 -v /etc/nagios3/nagios.cfg
352       
353Restart Nagios to see the new service assocation with your host:
354
355        # /etc/init.d/nagios3 restart
356
357Click on the "Service Detail" link in the Nagios web interface to see your new entry.
358
359
360
361PART III
362Defining Services for all PCs
363-----------------------------------------------------------------------------
364
3650. For services, the default normal_check_interval is 5 (minutes) in
366   generic-service_nagios2.cfg. You may wish to change this to 1 to speed up
367   how quickly service issues are detected, at least in the workshop.
368
3691. Determine what services to define for what devices
370
371   - This is core to how you use Nagios and network monitoring tools in
372     general. So far we are simply using ping to verify that physical hosts
373     are up on our network and we have started monitoring a single service on
374     a single host (your PC). The next step is to decide what services you wish
375     to monitor for each host in the classroom.
376
377   - In this particular class we have:
378
379     routers:  running ssh and snmp
380     switches: running telnet and possibly ssh as well as snmp
381     pcs:      All PCs are running ssh and http and should be running snmp
382               The NOC is currently running an snmp daemon
383             
384     So, let's configure Nagios to check for these services for these
385     devices.
386
3872.) Verify that SSH is running on the routers and workshop PCs images
388
389   - In the file services_nagios2.cfg there is already an entry for the SSH
390     service check, so you do not need to create this step. Instead, you
391     simply need to re-define the "ssh-servers" entry in the file
392     /etc/nagios3/conf.d/hostgroups_nagios2.cfg. The initial entry in the file
393     looked like:
394
395# A list of your ssh-accessible servers
396define hostgroup {
397        hostgroup_name  ssh-servers
398                alias           SSH servers
399                members         localhost
400        }
401
402     What do you think you should change? Correct, the "members" line. You should
403     add in entries for all the classroom pcs, routers and  the switches that run ssh.
404     With this information and the network diagram you should be able complete this entry.
405     
406     The entry will look something like this:
407
408define hostgroup {
409        hostgroup_name  ssh-servers
410                alias           SSH servers
411                members         localhost,pc1,pc2,pc3,pc4...,pc32,
.ap1,ap2,s1,s2,noc,rtr1,rtr2
rtr9,gw-rtr
412        }
413
414         Note: leave in "localhost" - This is your PC and represents Nagios' network point of
415         view. So, for instance, if you are on "pc3" you would not include "pc3" in the list
416         of all the classroom pcs as it is represented by the "localhost" entry.
417         
418         The "members" entry will be a long line and will likely wrap on the screen.
419
420         Remember to include all your PCs and all your routers that you have defined. Do not
421         include any entries if they are not already defined in pcs.cfg, switches.cfg or
422         routers.cfg.
423
424    - Once you are done, run the pre-flight check:
425
426    # nagios3 -v /etc/nagios3/nagios.cfg
427
428    If everything looks good, then restart Nagios
429
430    # /etc/init.d/nagios3 stop
431    # /etc/init.d/nagios3 start
432
433    and view your changes in the Nagios web interface.
434
435To continue with hostgroups you can add additional groups for later use, such as all our virtual
436servers. Go ahead and edit the file hostgroups_nagios2.cfg again:
437
438     # editor hostgroups_nagios2.cfg
439
440and add the following to the end of the file:
441
442# A list of our virtual routers
443define hostgroup {
444        hostgroup_name  cisco7200
445                alias           Cisco 7200 Routers
446                members         rtr1,rtr2,rtr3,rtr4,rtr5,rtr6,rtr7,rtr8,rtr9
447        }
448
449Save and exit from the file. Verify that everything is OK:
450
451    # nagios3 -v /etc/nagios3/nagios.cfg
452
453    If everything looks good, then restart Nagios
454
455    # service nagios3 stop
456    # service nagios3 start
457
4583.) Check that http is running on all the classroom PCs.
459
460    - This is almost identical to the previous exercise. Just make the change to the
461      HTTP service adding in each PC (no routers or switches). Remember, you don't need
462      to add your machine as it is already defined as "localhost".     
463
464
465
466PART IV
467Adding Parent Relationships
468-----------------------------------------------------------------------------
469
470Each item is a child of either a switch or a router in our classroom, EXCEPT for
471your gateway router (rtrX) and the other members of your group. We are now going
472to add a "parents" statement for each device we have configured.
473
474If you are unsure of the parent relationships you can look at our classroom Network
475Diagram. Remember, the parent relationships are from the point of view of your Nagios
476instance running on your pc.
477
4781. Adding Parents to switches.cfg
479---------------------------------
480
481        # cd /etc/nagios3/conf.d
482        # editor switches.cfg
483
484Update the entry:
485
486
487define host {
488    use         generic-host
489    host_name   sw
490    alias       Backbone Switch
491    address     10.10.0.253
492}
493
494
495to be
496
497
498define host {
499    use         generic-host
500    host_name   sw
501    alias       Backbone Switch
502    address     10.10.0.253
503    parents     rtrX
504}
505
506
507Where "rtrX" is the gateway router for your group. I.E., for group 1 you
508would use "rtr1", for group 2, "rtr2" and so forth.
509
510Save and exit from the file.
511
512
5132. Adding Parents to routers.cfg
514--------------------------------
515
516        # editor routers.cfg
517
518For each entry we will add a "parents" line. So, for the gw-rtr definition at
519the top of the file this should now look like:
520
521
522define host {
523    use         generic-host
524    host_name   gw-rtr
525    alias       Classrooom Gateway Router
526    address     10.10.0.254
527    parents     sw
528}
529
530For all the remaining rtrX entries you should, also, add a line that says:
531
532    parents     sw
533
534EXCEPT For the rtrX entry for your group. There should be NO PARENTS entry.
535
536So, if you are in group 2, then the entries for groups 1, 2 and 3 would look like:
537
538
539define host {
540    use         generic-host
541    host_name   rtr1
542    alias       Group 2 Router
543    address     10.10.1.254
544    parents     sw
545}
546
547define host {
548    use         generic-host
549    host_name   rtr2
550    alias       Group 2 Router
551    address     10.10.2.254
552}
553
554define host {
555    use         generic-host
556    host_name   rtr3
557    alias       Group 3 Router
558    address     10.10.3.254
559    parents     sw
560}
561
562
563Update the rest of the file correctly and then save and exit from the file.
564
565
5663. Adding Parents to pcs.cfg
567-----------------------------
568
569For all the PC entries you should add a "parents" line that has the router
570for that PC's group. For the noc the parent is the core switch or "sw"
571
572#
573# Classroom NOC
574#
575
576define host {
577    use         generic-host
578    host_name   noc
579    alias       Workshop NOC machine
580    address     10.10.0.250
581    parents     sw
582}
583
584
585For PCs in Group 1 entries look like:
586
587
588#
589# Group 1
590#
591
592define host {
593    use         generic-host
594    host_name   pc1
595    alias       pc1
596    address     10.10.1.1
597    parents     rtr1
598}
599
600define host {
601    use         generic-host
602    host_name   pc2
603    alias       pc2
604    address     10.10.1.2
605    parents     rtr1
606}
607
608
609etc

610
611Do this for all the PCs in the remaining groups.
612
613BUT, FOR THE 4 ENTRIES FOR THE PCS IN YOUR GROUP DO NOT ADD ANY PARENTS
614STATEMENT!
615
616Save and exit from the file.
617
618
6194. Restart Nagios and See the Updated Status Map
620------------------------------------------------
621
622        # service nagios3 restart
623
624If you have errors, fix these and try restarting again.
625
626Open a web browser to http://pcN.ws.nsrc.org/nagios3 and click on the "Status Map"
627link on the left. Your map should now look quite different. You should a map that
628represents the Nagios world point of view from your machine.
629
630
631
632PART V
633Create More Host Groups
634-----------------------------------------------------------------------------
635
6360. In the web view, look at the pages "Hostgroup Overview", "Hostgroup
637   Summary", "Hostgroup Grid". This gives a convenient way to group together
638   hosts which are related (e.g. in the same site, serving the same purpose).
639
6401. Update /etc/nagios3/conf.d/hostgroups_nagios2.cfg
641
642    - For the following exercises it will be very useful if we have created
643      or update the following hostgroups:
644
645      debian-servers
646      routers
647      switches
648 
649      If you edit the file /etc/nagios3/conf.d/hostgroups_nagios2.cfg you
650      will see an entry for debian-servers that just contains localhost.
651      Update this entry to include all the classroom PCs, including the
652      noc (this assumes that you created a "noc" entry in your pcs.cfg
653      file). Remember to skip your PC entry as it is represented by the
654      localhost entry.
655
656    # editor /etc/nagios3/conf.d/hostgroups_nagios2.cfg
657
658     Update the entry that says:
659
660
661# A list of your Debian GNU/Linux servers
662define hostgroup {
663        hostgroup_name  debian-servers
664                alias           Debian GNU/Linux Servers
665                members         localhost
666        }
667     
668      So that the "members" parameter contains something like this. Use your
669      classroom network diagram to confirm the exact number of machines and names
670      in your workshop.
671
672                members         localhost,pc1,pc2,pc3,pc4,pc5,pc6,pc7,pc8,pc9
673                                pc10,pc11,pc12,pc13,pc14,pc15,pc16,pc17,pc18,
674                                pc19,pc20,pc21,pc22,pc23,pc24,pc25,pc26,pc27,
675                                pc28,pc29,pc30,pc31,pc32,pc33,pc34,pc35,pc36
676
677        Be sure that the line wraps and is not on separate lines. Otherwise
678        you will get an error when you go to restart Nagios. Remember that
679        your own PC is "localhost".
680
681      - Once you have done this, add in two more host groups, one for routers and
682        one for switches. Call these entries "routers" and "switches".
683
684      - When you are done be sure to verify your work and restart Nagios.
685
686      - Remember to skip your pc entry as it is represented by the localhost entry.
687 
6882. Go back to the web interface and look at your new hostgroups
689
690
691PART VI
692Extended Host Information ("making your graphs pretty")
693-----------------------------------------------------------------------------
694
6951. Update extinfo_nagios2.cfg
696
697    - If you would like to use appropriate icons for your defined hosts in
698      Nagios this is where you do this. We have the three types of devices:
699
700      Cisco routers
701      Cisco switches
702      Ubuntu servers
703
704      There is a fairly large repository of icon images available for you to
705      use located here:
706
707      /usr/share/nagios/htdocs/images/logos/
708
709      these were installed by default as dependent packages of the nagios3
710      package in Ubuntu. In some cases you can find model-specific icons for
711      your hardware, but to make things simpler we will use the following
712      icons for our hardware:
713
714      /usr/share/nagios/htodcs/images/logos/base/debian.*
715      /usr/share/nagios/htdocs/images/logos/cook/router.*
716      /usr/share/nagios/htdocs/images/logos/cook/switch.*
717
718    - The next step is to edit the file /etc/nagios3/conf.d/extinfo_nagios2.cfg
719      and tell nagios what image you would like to use to represent your devices.
720
721    # editor /etc/nagios3/conf.d/extinfo_nagios2.cfg
722
723      Here is what an entry for your routers looks like (there is already an entry
724      for debian-servers that will work as is). Note that the router model (3600)
725      is not all that important. The image used represents a router in general.
726
727define hostextinfo {
728        hostgroup_name   routers
729        icon_image       cook/router.png
730        icon_image_alt   Cisco Routers (3600)
731        vrml_image       router.png
732        statusmap_image  cook/router.gd2
733}
734
735      Now add an entry for your switches. Once you are done check your
736      work and restart Nagios. Take a look at the Status Map in the web interface.
737      It should be much nicer, with real icons instead of question marks.
738
739
740PART VII
741Create Service Groups
742-----------------------------------------------------------------------------
743
7441. Create service groups for ssh and http for each set of pcs.
745
746   - The idea here is to create three service groups. Each service group will
747     be for a quarter of the classroom. We want to see these PCs grouped together
748     and include status of their ssh and http services. To do this edit
749     and create the file:
750
751   # editor /etc/nagios3/conf.d/servicegroups.cfg
752
753     Here is a sample of the service group for group 1:
754
755define servicegroup {
756        servicegroup_name       group1-servers
757        alias                   group 1 servers
758        members                 pc1,SSH,pc1,HTTP,pc2,SSH,pc2,HTTP,pc3,SSH,pc3,HTTP,pc4,SSH,pc4,HTTP
759        }
760
761        - Note that the members line should wrap and not be on two lines.
762       
763        - Note that "SSH" and "HTTP" need to be uppercase as this is how the service_description is
764          written in the file /etc/nagios3/conf.d/services_nagios2.cfg
765         
766        - You should create an entry for other groups of servers too
767
768    - Save your changes, verify your work and restart Nagios. Now if you click on
769      the Servicegroup menu items in the Nagios web interface you should see
770      this information grouped together.
771
772
773
774PART VIII
775Configure Guest Access to the Nagios Web Interface
776-----------------------------------------------------------------------------
777
7781. Edit /etc/nagios3/cgi.cfg to give read-only guest user access to the Nagios
779   web interface.
780
781    - By default Nagios is configured to give full r/w access via the Nagios
782      web interface to the user nagiosadmin. You can change the name of this
783      user, add other users, change how you authenticate users, what users
784      have access to what resources and more via the cgi.cfg file.
785
786    - First, lets create a "guest" user and password in the htpasswd.users
787      file.
788     
789    # htpasswd /etc/nagios3/htpasswd.users guest
790
791      You can use any password you want (or none). A password of "guest" is
792      not a bad choice.
793
794    - Next, edit the file /etc/nagios3/cgi.cfg and look for what type of access
795      has been given to the nagiosadmin user. By default you will see the following
796      directives (note, there are comments between each directive):
797
798      authorized_for_system_information=nagiosadmin
799      authorized_for_configuration_information=nagiosadmin
800      authorized_for_system_commands=nagiosadmin
801      authorized_for_all_services=nagiosadmin
802      authorized_for_all_hosts=nagiosadmin
803      authorized_for_all_service_commands=nagiosadmin
804      authorized_for_all_host_commands=nagiosadmin
805
806      Now let's tell Nagios to allow the "guest" user some access to
807      information via the web interface. You can choose whatever you would
808      like, but what is pretty typical is this:
809
810      authorized_for_system_information=nagiosadmin,guest
811      authorized_for_configuration_information=nagiosadmin,guest
812      authorized_for_system_commands=nagiosadmin
813      authorized_for_all_services=nagiosadmin,guest
814      authorized_for_all_hosts=nagiosadmin,guest
815      authorized_for_all_service_commands=nagiosadmin
816      authorized_for_all_host_commands=nagiosadmin
817
818    - Once you make the changes, save the file cgi.cfg, verify your
819      work and restart Nagios.
820
821    - To see if you can log in as the "guest" user you may need to clear
822      the cookies in your web browser. You will not notice any difference
823      in the web interface. The difference is that a number of items that
824      are available via the web interface (forcing a service/host check,
825      scheduling checks, comments, etc.) will not work for the guest
826      user.
827
828
829
830PART IX
831Optional Exercises
832-------------------------------------------------------------------------------
833
8341. Check that nagios is Running
835-------------------------------
836
837As opposed to just checking that a web server is
838running on the classroom PCs, you could also check that the nagios3
839service is available, by requesting the /nagios3/ path. This means
840passing extra options to the check_http plugin.
841
842For a description of the available options, type this:
843
844      # /usr/lib/nagios/plugins/check_http
845      # /usr/lib/nagios/plugins/check_http --help
846
847and of course you can browse the online nagios documentation or google
848for information on check_http. You can even run the plugin by hand to
849perform a one-shot service check:
850
851     # /usr/lib/nagios/plugins/check_http -H localhost -u /nagios3/
852
853So the goal is to configure nagios to call check_http in this way.
854
855define command{
856        command_name    check_http_arg
857        command_line    /usr/lib/nagios/plugins/check_http -H '$HOSTADDRESS$' $ARG1$
858        }
859
860define service {
861        hostgroup_name                  nagios-servers
862        service_description             NAGIOS
863        check_command                   check_http_arg!-u /nagios3/
864        use                             generic-service
865}
866
867     and of course you'll need to create a hostgroup called nagios-servers to
868     link to this service check.
869
870     Once you have done this, check that Nagios warns you about failing
871     authentication (because it's trying to fetch the page without providing
872     the username/password). There's an extra parameter you can pass to
873     check_http_arg to provide that info, see if you can find it.
874
875      WARNING: in the tradition of "Debian Knows Best", their definition of the
876      check_http command in /etc/nagios-plugins/config/http.cfg
877      is *not* the same as that recommended in the nagios3 documentation.
878      It is missing $ARG1$, so any parameters to pass to check_http are
879      ignored. So you might think you are monitoring /nagios3/ but actually
880      you are monitoring root!
881
882     This is why we had to make a new command definition "check_http_arg".
883     You could make a more specific one like "check_nagios", or you could
884     modify the Ubuntu check_http definition to fit the standard usage.
885
886
887
8882. Check that SNMP is running on the classroom NOC
889----------------------------------------------------
890
891    - First you will need to add in the appropriate service check for SNMP in the file
892      /etc/nagios3/conf.d/services_nagios2.cfg. This is where Nagios is impressive. There
893      are hundreds, if not thousands, of service checks available via the various Nagios
894      sites on the web. You can see what plugins are installed by Ubuntu in the nagios3
895      package that we've installed by looking in the following directory:
896
897    # ls /usr/lib/nagios/plugins
898
899      As you'll see there is already a check_snmp plugin available to us. If you are
900      interested in the options the plugin takes you can execute the plugin from the
901      command line by typing:
902
903    # /usr/lib/nagios/plugins/check_snmp
904    # /usr/lib/nagios/plugins/check_snmp --help
905
906      to see what options are available, etc. You can use the check_snmp plugin and
907      Nagios to create very complex or specific system checks.
908
909    - Now to see all the various service/host checks that have been created using the
910      check_snmp plugin you can look in /etc/nagios-plugins/config/snmp.cfg. You will
911      see that there are a lot of preconfigured checks using snmp, including:
912
913      snmp_load
914      snmp_cpustats
915      snmp_procname
916      snmp_disk
917      snmp_mem
918      snmp_swap
919      snmp_procs
920      snmp_users
921      snmp_mem2
922      snmp_swap2
923      snmp_mem3
924      snmp_swap3
925      snmp_disk2
926      snmp_tcpopen
927      snmp_tcpstats
928      snmp_bgpstate
929      check_netapp_uptime
930      check_netapp_cupuload
931      check_netapp_numdisks
932      check_compaq_thermalCondition
933     
934      And, even better, you can create additional service checks quite easily.
935      For the case of verifying that snmpd (the SNMP service on Linux) is running we
936      need to ask SNMP a question. If we don't get an answer, then Nagios can assume
937      that the SNMP service is down on that host. When you use service checks such as
938      check_http, check_ssh and check_telnet this is what they are doing as well.
939
940    - In our case, let's create a new service check and call it "check_system". This
941      service check will connect with the specified host, use the private community
942      string we have defined in class and ask a question of snmp on that ask - in this
943      case we'll ask about the System Description, or the OID "sysDescr.0" -
944
945    - To do this start by editing the file /etc/nagios-plugins/config/snmp.cfg:
946
947    # joe /etc/nagios-plugins/config/snmp.cfg
948
949      At the top (or the bottom, your choice) add the following entry to the file:
950
951# 'check_system' command definition
952define command{
953       command_name    check_system
954       command_line    /usr/lib/nagios/plugins/check_snmp -H '$HOSTADDRESS$' -C
955'$ARG1$' -o sysDescr.0
956        }
957     
958      You may wish to copy and paste this vs. trying to type this out.
959
960          Note that "command_line" is a single line. If you copy and paste in joe the line
961          may not wrap properly and you may have to manually add the part:
962         
963                        '$ARG1$' -o sysDescr.0
964                       
965          to the end of the line.
966
967    - Now you need to edit the file /etc/nagios3/conf.d/services_nagios2.cfg and add
968      in this service check. We'll run this check against all our servers in the
969      classroom, or the hostgroup "debian-servers"
970
971    - Edit the file /etc/nagios3/conf.d/services_nagios2.cfg
972
973    # joe /etc/nagios3/conf.d/services_nagios2.cfg
974
975      At the bottom of the file add the following definition:
976
977# check that snmp is up on all servers
978define service {
979        hostgroup_name                  snmp-servers
980        service_description             SNMP
981        check_command                   check_system!xxxxxx
982        use                             generic-service
983        notification_interval           0 ; set > 0 if you want to be renotified
984}
985
986      The "xxxxxx" is the community string previously (or to be) defined in class.
987     
988      Note that we have included our private community string here vs. hard-coding
989      it in the snmp.cfg file earlier. You must change the "xxxxx" to be the snmp
990      community string given in class or this check will not work.
991     
992    - Now we must create the "snmp-servers" group in our hostgroups_nagios2.cfg file.
993      Edit the file /etc/nagios3/conf.d/hostgroups_nagios2.cfg and go to the end of the
994      file. Add in the following hostgroup definition:
995     
996# A list of snmp-enabled devices on which we wish to run the snmp service check
997define hostgroup {
998           hostgroup_name       snmp-servers
999                   alias        snmp servers
1000                   members      noc
1001          }
1002         
1003        - Note that for "members" you could, also, add in the switches and routers for
1004          group 1 and 2. But, the particular item (MIB) we are checking for "sysDescr.0"
1005          may not be available on the switches and/or routers, so the check would then fail.
1006
1007    - Now verify that your changes are correct and restart Nagios.
1008
1009    - If you click on the Service Detail menu choice in web interface you should see
1010      the SNMP check appear for the noc host.
1011     
1012    - After we do the SNMP presentation and exercises in class, then you could come
1013      back to this exercise and add in all the classroom PCs to the members list in the
1014      hostgroups_nagios2.cfg file, snmp-servers hostgroup definition. Remember to list
1015      your PC as "localhost".
1016
1017