Nagios監控生產環境redis集群服務實戰

jopen 10年前發布 | 35K 次閱讀 Nagios Redis NoSQL數據庫
前言:
    以前做了cacti上展示redis性能報表圖,可以看到redis的性能變化趨勢圖,但是還缺了實時報警通知的功能,現在補上這一環節。在redis服務瓶頸或者異常時候即使報警通知,方便dba第一時間處理維護。

1,下載redis監控插件

Redis已經在服務器安裝好了,所以直接可以進行監控,redis集群安裝請參考:http://blog.itpub.net/26230597/viewspace-1145831/,下載地址為:http://download.csdn.net/detail/mchdba/80233512個版本,一個是perl腳本寫成的,一個是php腳本寫成的,可以任意選擇一個,這里選擇的是perl腳本。

   

2,賦予執行權限

check_redis.phpcheck_redis.pl復制到/usr/lib/nagios/plugins/目錄,然后賦予執行權限,

[root@wgq_41 plugins]# cd /usr/lib/nagios/plugins/

[root@wgq_41 plugins]# chown -R nagios.nagios check_redis.*

[root@wgq_41 plugins]# chmod 750 check_redis.*

 

3,定義監控命令

[root@wgq objects] vim /usr/local/nagios/etc/objects/commands.cfg

# add by tim on 20141010,for redis

# check redis

define command {

         command_name    check_redis

         command_line    /usr/lib/nagios/plugins/check_redis.pl -H $HOSTADDRESS$ -p $ARG1$ -a $ARG2$ -w $ARG3$ -c $ARG4$ -f

}


4,定義redis監控主機

[root@wgq etc]# vim /usr/local/nagios/etc/hosts.cfg

# No.018redis master server

define host{

        use                     linux-server

        host_name               cache-1

        alias                   cache-1

        address                 10.xxx.3.x0

        check_command           check-host-alive

        max_check_attempts              5

        check_period                    24x7

        contact_groups                  ops

        notification_interval           30

        notification_period             24x7

        notification_options            d,u,r

        }

# No.020 cache-3 redis slave server

define host{

        use                     linux-server

        host_name               cache-3

        alias                   cache-3

        address                 10.xx.3.x2

        check_command           check-host-alive

        max_check_attempts              5

        check_period                    24x7

        contact_groups                  ops

        notification_interval           30

        notification_period             24x7

        notification_options            d,u,r

        }


5,定義redis監控主機組

define hostgroup {

        hostgroup_name  Redis_Servers

        alias           Redisservices

        members         cache-1,cache-2

}

 

6,定義redis監控服務選項

[root@wgq objects]# vim /usr/local/nagios/etc/objects/services_redis.cfg

# Redis Master 監控選項

define service {

                   host_name               cache-1

                   servicegroups           Redisservices

        service_description     Redis Master Clients

        check_command           check_redis!6379!'connected_clients,blocked_clients,client_longest_output_list,client_biggest_input_buf'!200,50,~,~!600,150,~,~

        max_check_attempts      5

        normal_check_interval   3

        retry_check_interval    2

        check_period            24x7

        notification_interval   10

        notification_period     24x7

        notification_options    w,u,c,r

        contact_groups          ops

}

 

define service {

                   host_name               cache-1

                   servicegroups           Redisservices

        service_description     Redis Master Memory

        check_command           check_redis!6379!'used_memory_human,used_memory_peak_human'!~,~!~,~

        max_check_attempts      5

        normal_check_interval   3

        retry_check_interval    2

        check_period            24x7

        notification_interval   10

        notification_period     24x7

        notification_options    w,u,c,r

        contact_groups          ops

}

 

define service {

                   host_name               cache-1

                 servicegroups           Redisservices

        service_description     Redis Master CPU

        check_command           check_redis!6379!'used_cpu_sys,used_cpu_user,used_cpu_sys_children,used_cpu_user_children'!~,~,~,~!~,~,~,~ ; #未定義監控報警閥值

        max_check_attempts      5

        normal_check_interval   3

        retry_check_interval    2

        check_period            24x7

        notification_interval   10

        notification_period     24x7

        notification_options    w,u,c,r

        contact_groups          ops

}

 

# Redis Slave 監控選項

define service {

                   host_name               cache-3

                   servicegroups           Redisservices

        service_description     Redis Slave Clients

        check_command           check_redis!6379!'connected_clients,blocked_clients,client_longest_output_list,client_biggest_input_buf'!200,50,~,~!600,150,~,~

        max_check_attempts      5

        normal_check_interval   3

        retry_check_interval    2

        check_period            24x7

        notification_interval   10

        notification_period     24x7

        notification_options    w,u,c,r

        contact_groups          ops

}

 

define service {

                   host_name               cache-3

                   servicegroups           Redisservices

        service_description     Redis Slave Memory

        check_command           check_redis!6379!'used_memory_human,used_memory_peak_human'!~,~!~,~

        max_check_attempts      5

        normal_check_interval   3

        retry_check_interval    2

        check_period            24x7

        notification_interval   10

        notification_period     24x7

        notification_options    w,u,c,r

        contact_groups          ops

}

 

define service {

                   host_name               cache-3

                   servicegroups           Redisservices

        service_description     Redis Slave CPU

        check_command           check_redis!6379!'used_cpu_sys,used_cpu_user,used_cpu_sys_children,used_cpu_user_children'!~,~,~,~!~,~,~,~ ; #未定義監控報警閥值

        max_check_attempts      5

        normal_check_interval   3

        retry_check_interval    2

        check_period            24x7

        notification_interval   10

        notification_period     24x7

        notification_options    w,u,c,r

        contact_groups          ops

}

 

賦予nagios用戶執行權限

[root@wgq objects]# chown -R  nagios.nagios services_redis.cfg

[root@wgq objects]# chmod 777 services_redis.cfg

添加監控服務項到nagios.cfg

[root@wgq etc]# vim /usr/local/nagios/etc/nagios.cfg

cfg_file=/usr/local/nagios/etc/objects/services_redis.cfg

 

7,測試redis監控服務

執行命令/usr/lib/nagios/plugins/check_redis.pl  -H cache-1 -a 'connected_clients,blocked_clients' -w ~,~ -c ~,~ -m -M 4G -A -R -T 來測試下redis監控是否正常運行

[root@wgq plugins]# /usr/lib/nagios/plugins/check_redis.pl  -H 10.2xx.3.x0 -a 'connected_clients,blocked_clients' -w ~,~ -c ~,~ -m -M 4G -A -R -T

OK: REDIS 2.8.8 on 10.2xx.3.x0:6379 has 1 databases (db0) with 28497 keys, up 76 days 2 hours - response in 0.004s, hitrate is 12.83%, memory use is 194.14M (peak 205.14M, 6.49% of max, fragmentation 1.37%), connected_clients is 35, blocked_clients is 11 | redis_build_id=d322d411218ade61 total_connections_received=341191c used_memory_lua=33792 aof_rewrite_buffer_length=0 used_memory_rss=278749184B redis_git_dirty=0 loading=0 redis_mode=standalone latest_fork_usec=5588 repl_backlog_first_byte_offset=0 sync_partial_ok=0 master_repl_offset=0 uptime_in_days=76c aof_rewrite_scheduled=0 lru_clock=3649276 rdb_bgsave_in_progress=0 rejected_connections=0 repl_backlog_active=0 aof_delayed_fsync=1 sync_full=0 process_id=7776 used_memory_human=194.14M aof_current_rewrite_time_sec=-1 used_memory=203570960 aof_enabled=1 blocked_clients=11 aof_last_bgrewrite_status=ok aof_rewrite_in_progress=0 sync_partial_err=0 used_cpu_sys_children=2222.75 connected_slaves=0 repl_backlog_histlen=0 uptime_in_seconds=6576292c repl_backlog_size=1048576 os=Linux 2.6.32-358.el6.x86_64 x86_64 used_cpu_sys=32640.80 aof_pending_bio_fsync=0 connected_clients=35 rdb_last_bgsave_time_sec=1 used_memory_peak_human=205.14M run_id=d1fc098d26fa4bbcef3eabeec6d19a858f03dd00 rdb_last_bgsave_status=ok pubsub_patterns=8 client_biggest_input_buf=0 keyspace_hits=42175896c rdb_last_save_time=1412935342 rdb_changes_since_last_save=318 db0_keys=28497 db0_expires=7 db0_avg_ttl=34003 aof_pending_rewrite=0 aof_buffer_length=0 config_file=/usr/local/redis-2.8.8/etc/redis.conf pubsub_channels=0 used_cpu_user_children=21375.34 hz=10 aof_last_rewrite_time_sec=2 aof_last_write_status=ok aof_base_size=82883253 used_cpu_user=18460.42 keyspace_misses=286602797c tcp_port=6379 total_commands_processed=797581196c mem_fragmentation_ratio=1.37 aof_current_size=146485850 rdb_current_bgsave_time_sec=-1 client_longest_output_list=0 instantaneous_ops_per_sec=114 evicted_keys=0c used_memory_peak=215106272B expired_keys=58977c total_keys=28497 total_expires=7 response_time=0.003802s hitrate=12.8281% memory_utilization=6.49013519287109%

[root@wgq plugins]#

 

8,查看redis監控服務狀態

先重新加載nagios,使剛添加的redis監控配置生效

[root@wgq objects]# service nagios reload

Running configuration check...

Reloading nagios configuration...

done

[root@wgq objects]#

redis監控服務界面,如下圖所示:

26230597_1412953319empy.png

9,操作過程中的報錯處理過程

報錯:

[root@wgq_line_cache_3_41 plugins]# ./check_redis.pl --help

Can't locate Redis.pm in @INC (@INC contains: /usr/local/lib64/perl5 /usr/local/share/perl5 /usr/lib64/perl5/vendor_perl /usr/share/perl5/vendor_perl /usr/lib64/perl5 /usr/share/perl5 .) at ./check_redis.pl line 421.

BEGIN failed--compilation aborted at ./check_redis.pl line 421.

[root@wgq_line_cache_3_41 plugins]#

 

[root@wgq_line_cache_3_41 plugins]# perl -MCPAN -e shell

Terminal does not support AddHistory.

 

cpan shell -- CPAN exploration and modules installation (v1.9402)

Enter 'h' for help.

 

cpan[1]> install Redis

Can't locate Module/Build/Tiny.pm in @INC (@INC contains: /usr/local/lib64/perl5 /usr/local/share/perl5 /usr/lib64/perl5/vendor_perl /usr/share/perl5/vendor_perl /usr/lib64/perl5 /usr/share/perl5 .) at Build.PL line 2.

BEGIN failed--compilation aborted at Build.PL line 2.

Warning: No success on command[/usr/bin/perl Build.PL --installdirs site]

Warning (usually harmless): 'YAML' not installed, will not store persistent state

  DAMS/Redis-1.976.tar.gz

  /usr/bin/perl Build.PL --installdirs site -- NOT OK

Running Build test

  Make had some problems, won't test

Running Build install

  Make had some problems, won't install

Could not read '/root/.cpan/build/Redis-1.976-Zhz6xI/META.yml'. Falling back to other methods to determine prerequisites……

 

YAML是以數據為中央的標記語言,其使用ASCII碼(如連字符、問號、冒號、逗號等)構造數據塊(標量值或哈希碼)。和XML相同,YAML也是一種機器可識別語言,并能和多種腳本語言相結合,其中一種便是Perl,需要安裝YAML,如下執行:

cpan[2]>install YAML

……

Appending installation info to /usr/lib64/perl5/perllocal.pod

  INGY/YAML-1.12.tar.gz

  /usr/bin/make install  -- OK

CPAN: YAML loaded ok (v1.12)

PS:這里可能會安裝失敗,失敗原因是網絡連接,可以多執行幾次install YAML就會成功。

再繼續執行install Redis,有如下提示信息

cpan[4]> install Redis

Running install for module 'Redis'

Running Build for D/DA/DAMS/Redis-1.976.tar.gz

  Has already been unwrapped into directory /root/.cpan/build/Redis-1.976-cUL4rt

  '/usr/bin/perl Build.PL --installdirs site' returned status 512, won't make

Running Build test

  Make had some problems, won't test

Running Build install

  Make had some problems, won't install

 

cpan[5]>

Build失敗,Build.PL故障了,需要重新安裝下執行命令install Build

cpan[5]> install Build

成功后,再執行install Redis

cpan[6]> install Redis

Redis安裝執行成功。

 

----------------------------------------------------------------------------------------------------------------

<版權所有,文章允許轉載,但必須以鏈接方式注明源地址,否則追究法律責任!>
原博客地址: http://blog.itpub.net/26230597/viewspace-1294684/
原作者:黃杉 (mchdba)

----------------------------------------------------------------------------------------------------------------


 

參考文檔:http://exchange.nagios.org/directory/Plugins/Databases/check_redis-2Epl/details

 本文由用戶 jopen 自行上傳分享,僅供網友學習交流。所有權歸原作者,若您的權利被侵害,請聯系管理員。
 轉載本站原創文章,請注明出處,并保留原始鏈接、圖片水印。
 本站是一個以用戶分享為主的開源技術平臺,歡迎各類分享!