Python 运用zabbix开发简易巡检工具

Python
516
0
0
2023-02-13
标签   Python实践

利用SSH或者Zabbix监控,配合Django开发框架,改造出属于自己的监控平台,实现包括主机图形,自动发现,计划任务,批量cmd执行,服务监控,日志监控等功能,由于公司机器混乱,基本上市面上的所有设备都能找到,监控这些不同型号不同系统的主机需要分别对待,我们就借助各种开源项目来做一个简单的巡检工具,方便工作需要。

安装Zabbix客户端

首先客户端还是需要安装zabbix-agent这个监控工具,该工具可以监控所有的平台,windows linux mac unix 等,利用该工具完成一整套自动化平台一点问题都没有,连ansible都不需要了。

[root@localhost ~]# wget http://repo.zabbix.com/zabbix/4.4/rhel/7/x86_64/zabbix-agent-4.4.3-1.el7.x86_64.rpm
[root@localhost ~]# rpm -ivh zabbix-agent-4.2.0-0.1alpha1.el7.x86_64.rpm 

该配置文件,并启动服务完事。

[root@localhost ~]# vim /etc/zabbix/zabbix_agentd.conf

PidFile=/var/run/zabbix/zabbix_agentd.pid
LogFile=/var/log/zabbix/zabbix_agentd.log
LogFileSize=0
Server=192.168.1.2
ServerActive=192.168.1.2
Hostname=centos1
Timeout=1
Include=/etc/zabbix/zabbix_agentd.d/*.conf
EnableRemoteCommands=1

[root@localhost ~]# systemctl restart zabbix-agent
[root@localhost ~]# systemctl enable zabbix-agent

服务端下载

https://www.zabbix.com/cn/download_agents

下载好之后将里面的 zabbix-get.exe 拖入项目中,直接调用就好。

img

基本的数据采集命令:

zabbix_get.exe -s 192.168.1.20 -p 10050 -k system.hostname
localhost.localdomain  // 此处才是主机名称

zabbix_get.exe -s 192.168.1.20 -p 10050 -k agent.hostname
centos1   // 此处是我们zabbix中自定义的键值对

zabbix_get.exe -s 192.168.1.20 -p 10050 -k agent.ping
1

zabbix_get.exe -s 192.168.1.20 -p 10050 -k agent.version
4.4.3      // zabbix 的version

zabbix_get.exe -s 192.168.1.20 -p 10050 -k system.boottime
1578564779    // 系统启动的时间戳

zabbix_get.exe -s 192.168.1.20 -p 10050 -k system.cpu.num
1              // 处理器个数

zabbix_get.exe -s 192.168.1.20 -p 10050 -k system.localtime
1578569019      // 当前系统时间戳

zabbix_get.exe -s 192.168.1.20 -p 10050 -k system.sw.arch
x86_64         // 返回系统架构

zabbix_get.exe -s 192.168.1.20 -p 10050 -k system.sw.os
Linux version 3.10.0-1062.el7.x86_64 (mockbuild@kbuilder.bsys.centos.org) (gcc version 4.8.5 20150623 (Red Hat 4.8.5-36) (GCC) ) #1 SMP Wed Aug 7 18:08:02 UTC 2019   // 返回系统详细架构

zabbix_get.exe -s 192.168.1.20 -p 10050 -k system.sw.packages[ssh]  // 已安装软件列表
[rpm] libssh2-1.8.0-3.el7.x86_64, openssh-7.4p1-21.el7.x86_64, openssh-clients-7.4p1-21.el7.x86_64, openssh-server-7.4p1-21.el7.x86_64

zabbix_get.exe -s 192.168.1.20 -p 10050 -k system.uname
Linux localhost.localdomain 3.10.0-1062.el7.x86_64 #1 SMP Wed Aug 7 18:08:02 UTC 2019 x86_64

zabbix_get.exe -s 192.168.1.20 -p 10050 -k system.uptime
4438   // 系统运行时长(秒)多少秒使用s/uptime来获取

zabbix_get.exe -s 192.168.1.20 -p 10050 -k system.users.num
0    // 登陆用户数

zabbix_get.exe -s 192.168.1.20 -p 10050 -k kernel.maxfiles
96437  // 系统支持最大的open files整数

zabbix_get.exe -s 192.168.1.20 -p 10050 -k kernel.maxproc
65536  // 系统支持最大进程数

zabbix_get.exe -s 192.168.1.20 -p 10050 -k system.hw.macaddr
[ens32] 00:50:56:22:6f:d3   // mac 地址列表

>zabbix_get.exe -s 192.168.1.20 -p 10050 -k system.sw.os
Linux version 3.10.0-1062.el7.x86_64 (mockbuild@kbuilder.bsys.centos.org) (gcc version 4.8.5 20150623 (Red Hat 4.8.5-36) (GCC) ) #1 SMP Wed Aug 7 18:08:02 UTC 2019   // 操作系统信息

允许远程执行命令:

[root@localhost ~]# vim /etc/zabbix/zabbix_agentd.conf
EnableRemoteCommands=1
[root@localhost ~]# systemctl restart zabbix-agent

D:\zabbix> zabbix_get.exe -s 192.168.1.20 -p 10050 -k "system.run[df -h]"
Filesystem               Size  Used Avail Use% Mounted on
devtmpfs                 480M     0  480M   0% /dev
tmpfs                    491M     0  491M   0% /dev/shm
tmpfs                    491M  7.4M  484M   2% /run
tmpfs                    491M     0  491M   0% /sys/fs/cgroup
/dev/mapper/centos-root   27G  1.7G   26G   7% /
/dev/sda1               1014M  136M  879M  14% /boot
tmpfs                     99M     0   99M   0% /run/user/0

监控CPU

system.cpu.util[<cpu>,<type>,<mode>]

-CPU:  CPU数量(默认是所有CPU)
-Type: 可用值,idle,nice,user,system,iowait,interrupt,sottrq,steal
-Mode  可用值,avg1(一分钟负载),avg5,avg15

[root@localhost ~]# zabbix_get -s 192.168.1.25 -k system.cpu.util[,,avg1]
0.016692

zabbix_get.exe -s 192.168.1.20 -p 10050 -k system.cpu.num
1   // cpu 个数

zabbix_get.exe -s 192.168.1.20 -p 10050 -k system.cpu.util
0.016461  // cpu 利用率百分比

zabbix_get.exe -s 192.168.1.20 -p 10050 -k system.hw.cpu  // cpu 详细信息
processor 0: GenuineIntel Intel(R) Celeron(R) CPU        E3500  @ 2.70GHz working at 2700MHz

监控内存

proc.mem[<name>,<user>,<mode>,<cmdline>,<memtype>]

-name    进程名(默认所有进程)
-user    用户名(默认所有用户)
-mode    可选值,avg,max,min,sum
-cmdline 命令过滤

[root@localhost ~]# zabbix_get -s 192.168.1.25 -k proc.mem[httpd,,,]
1376378880
[root@localhost ~]# zabbix_get -s 192.168.1.25 -k proc.mem[]
6479925248

zabbix_get.exe -s 192.168.1.20 -p 10050 -k system.swap.in
0      // Swap in (f内存到磁盘) .数字

zabbix_get.exe -s 192.168.1.20 -p 10050 -k system.swap.out
0  //Swap out (f内存到磁盘) .数字
zabbix_get.exe -s 192.168.1.20 -p 10050 -k system.swap.size
2147479552  // 交换分区大小字节或者百分比


mode - total (默认), active, anon, buffers, cached, exec, file, free, inactive, pinned, shared, wired, used, pused, available
zabbix_get.exe -s 192.168.1.20 -p 10050 -k vm.memory.size[free]
598949888   // 监控内存变化

监控网卡

net.if.in [if,<mode>]       #网卡入口流量
net.if.out [if,<mode>]      #网卡出口流量
net.if.total [if,<mode>]    #网卡进/出流量总和

-if    网卡名称
-mode  可用值(如下)

       bytes    字节数
       packets  包数量
       errors   错误数量
       droppend 丢包数量

[root@localhost ~]# zabbix_get -s 192.168.1.25 -k net.if.in[ens32,bytes]
165983
[root@localhost ~]# zabbix_get -s 192.168.1.25 -k net.if.in[ens32,packets]
2166
[root@localhost ~]# zabbix_get -s 192.168.1.25 -k net.if.out[ens32,packets]
2067
[root@localhost ~]# zabbix_get.exe -s 192.168.1.20 -p 10050 -k net.if.discovery
[{"{#IFNAME}":"ens32"},{"{#IFNAME}":"lo"}]  // 列出系统网卡信息
zabbix_get.exe -s 192.168.1.20 -p 10050 -k net.if.total[ens32]
20785083           // 列出网卡总流量

zabbix_get.exe -s 192.168.1.20 -p 10050 -k net.tcp.listen[10050]
1             // 检测指定端口是否开启

zabbix_get.exe -s 192.168.1.20 -p 10050 -k net.tcp.port[,80]
0             // 检测本机web服务是否开启

zabbix_get.exe -s 192.168.1.20 -p 10050 -k net.tcp.service[ssh,,22]
1            // 检测是定服务是否在运行

监控IO/读写

vfs.dev.read [<device>,<type>,<mode>]    #磁盘读取
vfs.dev.write [<device>,<type>,<mode>]   #磁盘写入

-device   磁盘设备(默认all)
-type     sectors,operations,bytes,sps,ops,hps
-mode     默认有(avg1,avg5,avg15)

[root@localhost ~]# zabbix_get -s 192.168.1.25 -k vfs.dev.read[/dev/sda,,avg1]
0.000000
[root@localhost ~]# zabbix_get -s 192.168.1.25 -k vfs.dev.read[all]
195.200000
[root@localhost ~]# zabbix_get -s 192.168.1.25 -k vfs.dev.read[all,,avg1]
195.200000
[root@localhost ~]# zabbix_get -s 192.168.1.25 -k vfs.dev.read[all,,avg5]
177.758242
[root@localhost ~]# zabbix_get -s 192.168.1.25 -k vfs.dev.write[all,,avg5]
465.284483
[root@localhost ~]# zabbix_get -s 192.168.1.25 -k vfs.dev.write[all,,avg1]
877.200000
[root@localhost ~]# zabbix_get -s 192.168.1.25 -k vfs.dev.write[/dev/sda,,]
374.800000

文件操作:

zabbix_get.exe -s 192.168.1.20 -p 10050 -k vfs.file.exists[/etc/passwd]
1   // 检测指定文件是否存在

zabbix_get.exe -s 192.168.1.20 -p 10050 -k vfs.file.md5sum[/etc/passwd]
52f5db4f5688b79b9c07ef5a42ea29af  // md5验证

zabbix_get.exe -s 192.168.1.20 -p 10050 -k vfs.file.size[/etc/passwd]
870   // 文件大小字节数

zabbix_get.exe -s 192.168.1.20 -p 10050 -k vfs.file.time[/etc/passwd]
1578566854  // 文件日期时间戳

zabbix_get.exe -s 192.168.1.20 -p 10050 -k vfs.fs.discovery  // 列出文件系统所有目录结构
[{"{#FSNAME}":"/","{#FSTYPE}":"rootfs"},{"{#FSNAME}":"/sys","{#FSTYPE}":"sysfs"},{"{#FSNAME}":"/proc","{#FSTYPE}":"proc"},{"{#FSNAME}":"/dev","{#FSTYPE}":"devtmpfs"},{"{#FSNAME}":"/sys/kernel/security","{#FSTYPE}":"securityfs"},{"{#FSNAME}":"/dev/shm"}]

mode - total (默认), free, used, pfree (空闲百分比), pused (使用百分比)
zabbix_get.exe -s 192.168.1.20 -p 10050 -k vfs.fs.inode[/dev,free]
122355 // 检测/dev/空闲空间

zabbix_get.exe -s 192.168.1.20 -p 10050 -k vfs.fs.size[/dev,free]
502525952  // 磁盘空间,返回本地文件系统的使用量字节

磁盘利用率

监控磁盘使用情况需要vfs.fs.size[fs,<mode>]键值

fs:文件系统
mode:模式
total默认全部,free空闲,used使用,pfree空闲百分比,pused使用百分比

名称:c盘总量
键值:vfs.fs.size[c:,total]


名称:c盘剩余量 键值vfs.fs.size[c:,free]
名称:c盘使用量 键值vfs.fs.size[c:,used]

名称:c盘剩余百分比 键值:vfs.fs.size[c:,pfree]
名称:c盘使用百分比 键值:vfs.fs.size[c:,pused]

先来完成一个Ping操作

import os,sqlite3,datetime,time,signal,sys
import subprocess

def Ping(address,port,timeout):
    command = "get.exe -s {} -p {} -k agent.ping".format(address,port).split(" ")
    start = datetime.datetime.now()
    process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
    while process.poll() is None:
        time.sleep(1)
        now = datetime.datetime.now()
        if (now - start).seconds > timeout:
            return 0
    return 1

for i in range(10,22):
    a = Ping("192.168.1.{}".format(i),"10050",1)
    print(a)

封装connect方法 connect.py

img

import os,subprocess,sys,math
import time
import datetime

# 检查主机状态
def GetPing(addr,port,timeout):
    try:
        command = "engine.exe -s {} -p {} -k agent.ping".format(addr, port).split(" ")
        start = datetime.datetime.now()
        process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
        while process.poll() is None:
            time.sleep(1)
            now = datetime.datetime.now()
            if (now - start).seconds > timeout:
                return 0
        return 1
    except Exception:
        return 0

# 获取目标主机名 仅用于Windows
def GetHostName(addr,port,timeout):
    try:
        Command = "engine.exe -s {} -p {} -k system.hostname".format(addr,port)
        start = datetime.datetime.now()
        process = subprocess.Popen(Command, shell=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE)
        HostName = str(process.stdout.readlines()[0].split()[0],"utf-8")

        while process.poll() is None:
            time.sleep(1)
            now = datetime.datetime.now()
            if (now - start).seconds > timeout:
                return 0
        return HostName
    except Exception:
        return 0

# 得到系统型号,例如windows linux等
def GetSysUname(addr,port,timeout):
    try:
        Command = "engine.exe -s {} -p {} -k system.uname".format(addr,port)
        start = datetime.datetime.now()
        process = subprocess.Popen(Command, shell=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE)
        Uname = str(process.stdout.readlines()[0].split()[0],"utf-8")

        while process.poll() is None:
            time.sleep(1)
            now = datetime.datetime.now()
            if (now - start).seconds > timeout:
                return 0
        return Uname
    except Exception:
        return 0

# 列出系统平台  x86 or x64
def GetSysArch(addr,port,timeout):
    try:
        command = "engine.exe -s {} -p {} -k system.sw.arch".format(addr, port).split(" ")
        start = datetime.datetime.now()
        process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
        while process.poll() is None:
            time.sleep(1)
            now = datetime.datetime.now()
            if (now - start).seconds > timeout:
                return 0
        arch = process.stdout.readlines()[0].split()[0]
        return str(arch,"utf-8")
    except Exception:
        return 0

# 获取CPU核心数
def GetCPUCoreNumber(addr,port,timeout):
    try:
        Command = "engine.exe -s {} -p {} -k system.cpu.num[online]".format(addr,port)
        start = datetime.datetime.now()
        process = subprocess.Popen(Command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        CpuCoreNum = process.stdout.readlines()[0].split()[0]
        #print()
        return str(CpuCoreNum,"utf-8")
    except Exception:
        return 0

# 获取CPU的利用率 10%
def GetCPUCoreInfo(addr,port,timeout):
    try:
        Command = "engine.exe -s {} -p {} -k system.cpu.util".format(addr, port)
        start = datetime.datetime.now()
        process = subprocess.Popen(Command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        CPU = math.ceil(float(process.stdout.readlines()[0].split()[0]))
        while process.poll() is None:
            time.sleep(1)
            now = datetime.datetime.now()
            if (now - start).seconds > timeout:
                return 0
        return CPU
    except Exception:
        return 0

# 获取CPU 的负载值 avg1 avg5 avg15
def GetCPULoadAvg(addr,port,avg,timeout):
    try:
        Command = "engine.exe -s {} -p {} -k system.cpu.load[,{}]".format(addr, port,avg)
        start = datetime.datetime.now()
        process = subprocess.Popen(Command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        CPU = float(process.stdout.readlines()[0].split()[0])
        while process.poll() is None:
            time.sleep(1)
            now = datetime.datetime.now()
            if (now - start).seconds > timeout:
                return 0
        return CPU
    except Exception:
        return 0

# 获取内存数据 返回总内存/剩余内存
def GetMemInfo(addr,port,timeout):
    try:
        Total = subprocess.Popen("engine.exe -s {} -p {} -k vm.memory.size[total]".format(addr, port)
                                 , shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        Free = subprocess.Popen("engine.exe -s {} -p {} -k vm.memory.size[free]".format(addr, port)
                                , shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        Total = math.ceil(int(Total.stdout.readlines()[0].split()[0])/1024/1024)
        Free = math.ceil(int(Free.stdout.readlines()[0].split()[0])/1024/1024)
        return Total,Free
    except Exception:
        return 0

# 获取全部磁盘容量
def GetDiskInfo(self):
    try:
        Free = subprocess.Popen("engine.exe -s {} -p {} -k vfs.fs.size[/,pfree]".format(self.addr, self.port)
                                 , shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        Free = float(Free.stdout.readlines()[0].split()[0])
        return Free
    except Exception:
        return 0

# 获取特定进程是否运行
def GetProcessStatus(addr,port,procname):
    command = "engine.exe -s {} -p {} -k proc.num[\"{}\"]".format(addr,port,procname)
    process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
    ref = process.stdout.readlines()[0]
    if ref == b"1\r\n":
        return 1
    return 0

# 获取端口开放状态
def GetNetworkPort(addr,port,check_port):
    command = "engine.exe -s {} -p {} -k net.tcp.listen[{}]".format(addr,port,check_port)
    process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
    ref = process.stdout.readlines()[0]
    if ref == b"1\r\n":
        return 1
    return 0

调用命令,获取系统基本信息。

import connect

def SysInfo():
    lis = [["127.0.0.1","MCP服务器"],["192.168.1.1","CTI"]]

    print("IP地址 \t\t 主机作用 \t\t 主机类型 \t\t 主机架构 \t\t 核心数 \t\t CPU利用率 \t\t CPU Avg1 \t Avg5 \t Avg15 \t 内存利用率 \t Ping")

    host_addr = lis[0][0]
    host_type = lis[0][1]

    host_address = host_addr
    host_user_type = host_type

    host_ping = connect.GetPing(host_address,"10050",2)
    host_type = connect.GetSysUname(host_address,"10050",2)
    host_arch = connect.GetSysArch(host_address,"10050",2)
    host_cpu_number = connect.GetCPUCoreNumber(host_address,"10050",2)
    host_cpu_core = connect.GetCPUCoreInfo(host_address,"10050",2)
    host_cpu_load1 = connect.GetCPULoadAvg(host_address,"10050","avg1",2)
    host_cpu_load5 = connect.GetCPULoadAvg(host_address,"10050","avg5",2)
    host_cpu_load15 = connect.GetCPULoadAvg(host_address,"10050","avg15",2)
    host_memory = connect.GetMemInfo(host_address,"10050",2)

    print("{} \t {} \t\t {} \t\t {} \t\t\t {} \t\t\t {}% \t\t {} \t\t {} \t {} \t {} \t {}".
          format(host_address,host_user_type,host_type,host_arch,host_cpu_number,
                 host_cpu_core,host_cpu_load1,host_cpu_load5,host_cpu_load15,host_memory,host_ping))

if __name__ == '__main__':
    SysInfo()

img

批量ping检测

import subprocess, datetime, time
import threading,os,sys

lock = threading.RLock()

def GetPing(command, timeout):
    cmd = command.split(" ")
    start = datetime.datetime.now()
    process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    while process.poll() is None:
        time.sleep(0.2)
        now = datetime.datetime.now()
        if (now - start).seconds> timeout:
            return "异常"
    try:
        ref = process.stdout.readlines()[0]
        if ref == b"1\r\n":
            return "正常"
        else:
            return "异常"
    except Exception:
        return "异常"

def MyThread(ptr):
    each = eval(ptr)
    command = "engine.exe -s {} -p 10050 -k agent.ping".format(each[0])
    ref = GetPing(command,1)
    lock.acquire()
    if(ref == "异常"):
        print("{0:15}\t\t {1:15}\t {2:10}\t\t {3:10}\t {4:4} <--".format(each[0], each[1], each[2], each[3], ref))
    else:
        print("{0:15}\t\t {1:15}\t {2:10}\t\t {3:10}\t {4:4}".format(each[0], each[1], each[2], each[3], ref))
    lock.release()

if __name__ == "__main__":
# Base.db 内容: ["127.0.0.1","ANA2048567","1M-2F","MCP服务器"]
    fp = open("base.db","r",encoding="utf-8")
    count=len(open("base.db","r",encoding="utf-8").readlines())

    print("-" * 100)
    print("{0:13}\t\t {1:15}\t {2:10}\t\t {3:8}\t {4:4}".format("IP地址","SN号码","机房位置","作用","Ping"))
    print("-" * 100)
    for i in range(count):
        ptr = fp.readline()
        thread = threading.Thread(target=MyThread, args=(ptr,))
        thread.start()

img

批量进程检测

import subprocess, datetime, time
import threading,os,sys

# 获取特定进程是否运行
def GetProcessStatus(addr,port,timeout,procname):
    command = "engine.exe -s {} -p {} -k proc.num[\"{}\"]".format(addr,port,procname)
    start = datetime.datetime.now()
    process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    while process.poll() is None:
        time.sleep(0.2)
        now = datetime.datetime.now()
        if (now - start).seconds> timeout:
            return 0
    try:
        ref = process.stdout.readlines()[0]
        if ref != b"0\r\n":
            return int(ref)
        else:
            return 0
    except Exception:
        return 0

if __name__ == "__main__":
    process_cache = []
    proc_fp = open("process.db","r",encoding="utf-8")
    count = len(open("process.db","r",encoding="utf-8").readlines())

    for each in range(1,count+1):
        proc = eval(proc_fp.readline())
        proc_len = len(proc)
        print("-" * 70)
        print("---> 巡检地址: {}".format(proc[0]))
        print("-" * 70)
        for process in range(1,proc_len):
            ref = GetProcessStatus(proc[0],10050,3,proc[process])
            if(ref != 0):
                print("进程: {0:18} \t  进程数: {1:5} \t 状态: {2}".format(proc[process],ref,"√"))
            else:
                print("进程: {0:18} \t  进程数: {1:5} \t 状态: {2}".format(proc[process],ref,"×"))
        print()

img

批量CPU负载检测等

import subprocess, datetime, time,math
import threading,os,sys

lock = threading.RLock()

# 获取CPU的利用率 %
def GetCPUCoreInfo(addr,port,timeout):
    try:
        Command = "engine.exe -s {} -p {} -k system.cpu.util".format(addr, port)
        start = datetime.datetime.now()
        process = subprocess.Popen(Command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        CPU = math.ceil(float(process.stdout.readlines()[0].split()[0]))
        while process.poll() is None:
            time.sleep(0.3)
            now = datetime.datetime.now()
            if (now - start).seconds > timeout:
                return str("0%")
        return str(CPU) + "%"
    except Exception:
        return str("0%")

# 获取CPU 的负载值 avg1 avg5 avg15
def GetCPULoadAvg(addr,port,avg,timeout):
    try:
        Command = "engine.exe -s {} -p {} -k system.cpu.load[,{}]".format(addr, port,avg)
        start = datetime.datetime.now()
        process = subprocess.Popen(Command, shell=True, stdout=subprocess.PIPE,stderr=subprocess.PIPE)
        CPU = float(process.stdout.readlines()[0].split()[0])
        while process.poll() is None:
            time.sleep(0.3)
            now = datetime.datetime.now()
            if (now - start).seconds > timeout:
                return 0
        return CPU
    except Exception:
        return 0

# 获取内存数据 返回 %
def GetMemInfo(addr,port,timeout):
    try:
        Total = subprocess.Popen("engine.exe -s {} -p {} -k vm.memory.size[total]".format(addr, port)
                                 , shell=True, stdout=subprocess.PIPE,stderr=subprocess.PIPE)
        start = datetime.datetime.now()
        while Total.poll() is None:
            time.sleep(0.3)
            now = datetime.datetime.now()
            if (now - start).seconds > timeout:
                return 0
        Free = subprocess.Popen("engine.exe -s {} -p {} -k vm.memory.size[free]".format(addr, port)
                                , shell=True, stdout=subprocess.PIPE,stderr=subprocess.PIPE)
        start = datetime.datetime.now()
        while Free.poll() is None:
            time.sleep(0.3)
            now = datetime.datetime.now()
            if (now - start).seconds > timeout:
                return str(0)+"%"
        Total = math.ceil(int(Total.stdout.readlines()[0].split()[0])/1024/1024)
        Free = math.ceil(int(Free.stdout.readlines()[0].split()[0])/1024/1024)
        percentage = 100 - int(Free/int(Total/100))
        return str(percentage)+"%"
    except Exception:
        return str(0)+"%"

def MyThread(ptr):
    address = eval(ptr)
    cpu_info = GetCPUCoreInfo(address[0], 10050, 1)
    mem_info = GetMemInfo(address[0], 10050, 1)
    cpu_load1 = GetCPULoadAvg(address[0], 10050, "avg1", 1)
    cpu_load5 = GetCPULoadAvg(address[0], 10050, "avg5", 1)
    cpu_load15 = GetCPULoadAvg(address[0], 10050, "avg15", 1)
    lock.acquire()
    print("{0:10} \t {1:10} \t {2:10} \t {3:10} \t {4:10} \t {5:10} \t".
          format(address[0], cpu_info, mem_info,cpu_load1, cpu_load5,cpu_load15))
    lock.release()

if __name__ == "__main__":
    fp = open("base.db","r",encoding="utf-8")
    count = len(open("base.db","r",encoding="utf-8").readlines())
    print("-" * 100)
    print("IP地址 \t\t CPU利用率 \t 内存利用率 \t 1分钟负载 \t 5分钟负载 \t 15分钟负载 \t")
    print("-" * 100)

    for i in range(count):
        ptr = fp.readline()
        thread = threading.Thread(target=MyThread, args=(ptr,))
        thread.start()

img

封装一个Zabbix调用类:

import subprocess,datetime,time,math

class Engine():
    def __init__(self,address,port):
        self.address = address
        self.port = port

    def GetValue(self,key):
        try:
            command = "get.exe -s {0} -p {1} -k {2}".format(self.address,self.port,key).split(" ")
            start = datetime.datetime.now()
            process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
            while process.poll() is None:
                time.sleep(1)
                now = datetime.datetime.now()
                if (now - start).seconds > 2:
                    return 0
            return str(process.stdout.readlines()[0].split()[0],"utf-8")
        except Exception:
            return 0

    # 获取主机组基本信息
    def GetSystem(self):
        ref_dict = { "Address" : 0 ,"HostName" : 0,"Uname":0, "Ping":0 }
        ref_dict["Address"] = self.address
        ref_dict["HostName"] = self.GetValue("system.hostname")
        ref_dict["Uname"] = self.GetValue("system.uname")
        ref_dict["Ping"] = self.GetValue("agent.ping")
        return ref_dict

    # 获取CPU利用率
    def GetCPU(self):
        ref_dict = { "Address": 0 ,"Core": 0,"Active":0 , "Avg1": 0 ,"Avg5":0 , "Avg15":0 }
        ref_dict["Address"] = self.address
        ref_dict["Core"] = self.GetValue("system.cpu.num")
        ref_dict["Active"] = math.ceil(float(self.GetValue("system.cpu.util")))
        ref_dict["Avg1"] = self.GetValue("system.cpu.load[,avg1]")
        ref_dict["Avg5"] = self.GetValue("system.cpu.load[,avg5]")
        ref_dict["Avg15"] = self.GetValue("system.cpu.load[,avg15]")
        return ref_dict

    # 获取内存利用率
    def GetMemory(self):
        ref_dict = { "Address":0,"Total":0,"Free":0,"Percentage":0 }
        ref_dict["Address"] = self.address
        ref_dict["Total"] = self.GetValue("vm.memory.size[total]")
        ref_dict["Free"] = self.GetValue("vm.memory.size[free]")
        # 计算百分比: percentage = 100 - int(Free/int(Total/100))
        ref_dict["Percentage"] = str( 100 - int( int(ref_dict.get("Free")) / (int(ref_dict.get("Total"))/100)) ) + "%"
        return ref_dict

    # 获取磁盘数据
    def GetDisk(self):
        ref_list = []
        disk_ = eval( self.GetValue("vfs.fs.discovery") )
        for x in range(len(disk_)):
            dict_ = { "Address":0, "Name":0,"Type":0,"Free":0}
            dict_["Address"] = self.address
            dict_["Name"] = disk_[x].get("{#FSNAME}")
            dict_["Type"] = disk_[x].get("{#FSTYPE}")
            if dict_["Type"] != "UNKNOWN":
                pfree = self.GetValue("vfs.fs.size[\"{0}\",pfree]".format(dict_["Name"]))
                dict_["Free"] = str(math.ceil(float(pfree)))
            else:
                dict_["Free"] = 0
            ref_list.append(dict_)
        return ref_list

if __name__ == "__main__":
    ptr_windows = Engine("132.35.93.2","10050")
    ret = ptr_windows.GetDisk()
    print(ret)

简单的监控脚本编写 (无脑写法)

首先是连接脚本 win32_connect.py

import subprocess, datetime, time
import threading, os, sys,math

# 获取Ping返回状态
def GetPing(command, timeout):
    cmd = command.split(" ")
    start = datetime.datetime.now()
    process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    while process.poll() is None:
        time.sleep(0.2)
        now = datetime.datetime.now()
        if (now - start).seconds > timeout:
            return "异常"
    try:
        ref = process.stdout.readlines()[0]
        if ref == b"1\r\n":
            return "正常"
        else:
            return "异常"
    except Exception:
        return "异常"

# 获取特定进程是否运行
def GetProcessStatus(addr,port,timeout,procname):
    command = "engine.exe -s {} -p {} -k proc.num[\"{}\"]".format(addr,port,procname)
    start = datetime.datetime.now()
    process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    while process.poll() is None:
        time.sleep(0.2)
        now = datetime.datetime.now()
        if (now - start).seconds> timeout:
            return 0
    try:
        ref = process.stdout.readlines()[0]
        if ref != b"0\r\n":
            return int(ref)
        else:
            return 0
    except Exception:
        return 0

# 获取CPU的利用率 %
def GetCPUCoreInfo(addr,port,timeout):
    try:
        Command = "engine.exe -s {} -p {} -k system.cpu.util".format(addr, port)
        start = datetime.datetime.now()
        process = subprocess.Popen(Command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        CPU = math.ceil(float(process.stdout.readlines()[0].split()[0]))
        while process.poll() is None:
            time.sleep(0.3)
            now = datetime.datetime.now()
            if (now - start).seconds > timeout:
                return str("0%")
        return str(CPU) + "%"
    except Exception:
        return str("0%")

# 获取CPU 的负载值 avg1 avg5 avg15
def GetCPULoadAvg(addr,port,avg,timeout):
    try:
        Command = "engine.exe -s {} -p {} -k system.cpu.load[,{}]".format(addr, port,avg)
        start = datetime.datetime.now()
        process = subprocess.Popen(Command, shell=True, stdout=subprocess.PIPE,stderr=subprocess.PIPE)
        CPU = float(process.stdout.readlines()[0].split()[0])
        while process.poll() is None:
            time.sleep(0.3)
            now = datetime.datetime.now()
            if (now - start).seconds > timeout:
                return 0
        return CPU
    except Exception:
        return 0

# 获取内存数据 返回 %
def GetMemInfo(addr,port,timeout):
    try:
        Total = subprocess.Popen("engine.exe -s {} -p {} -k vm.memory.size[total]".format(addr, port)
                                 , shell=True, stdout=subprocess.PIPE,stderr=subprocess.PIPE)
        start = datetime.datetime.now()
        while Total.poll() is None:
            time.sleep(0.3)
            now = datetime.datetime.now()
            if (now - start).seconds > timeout:
                return 0
        Free = subprocess.Popen("engine.exe -s {} -p {} -k vm.memory.size[free]".format(addr, port)
                                , shell=True, stdout=subprocess.PIPE,stderr=subprocess.PIPE)
        start = datetime.datetime.now()
        while Free.poll() is None:
            time.sleep(0.3)
            now = datetime.datetime.now()
            if (now - start).seconds > timeout:
                return str(0)+"%"
        Total = math.ceil(int(Total.stdout.readlines()[0].split()[0])/1024/1024)
        Free = math.ceil(int(Free.stdout.readlines()[0].split()[0])/1024/1024)
        percentage = 100 - int(Free/int(Total/100))
        return str(percentage)+"%"
    except Exception:
        return str(0)+"%"

# 获取磁盘容量、pfree_disk 已用 、 ptotal_disk 可用
def GetDisk(addr,port,timeout,diskname):
    command = "engine.exe -s {} -p {} -k vfs.fs.size[\"{}\",pfree]".format(addr,port,diskname)
    start = datetime.datetime.now()
    process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    try:
        while process.poll() is None:
            time.sleep(0.2)
            now = datetime.datetime.now()
            if (now - start).seconds> timeout:
                return 0
            try:
                pfree_disk = math.ceil(float(process.stdout.readlines()[0]))
                ptotal_disk = math.ceil(100 - pfree_disk)
                return pfree_disk,ptotal_disk
            except Exception:
                return 0,0
    except Exception:
        return 0,0

# 检测端口开启状态
def GetListenPort(addr,port,timeout,check_port):
    Command = "engine.exe -s {} -p {} -k net.tcp.listen[{}]".format(addr, port,check_port)
    start = datetime.datetime.now()
    try:
        process = subprocess.Popen(Command, shell=True, stdout=subprocess.PIPE,stderr=subprocess.PIPE)
        while process.poll() is None:
            time.sleep(0.2)
            now = datetime.datetime.now()
            if (now - start).seconds> timeout:
                return 0
            status = int(process.stdout.readlines()[0])
            return status
    except Exception:
        return 0
    return 0

# 检测Web服务器状态 使用LocalAddr的地址对check_addr的check_port端口进行检测
def CheckWebServerStatus(Local_Addr,port,timeout,check_addr,check_port):
    test_ping = "engine.exe -s {} -p 10050 -k agent.ping".format(Local_Addr)
    ref = GetPing(test_ping, 1)
    if ref != "异常":
        Command = "engine.exe -s {} -p {} -k net.tcp.port[\"{}\",{}]".format(Local_Addr, port,check_addr,check_port)
        start = datetime.datetime.now()
        try:
            process = subprocess.Popen(Command, shell=True, stdout=subprocess.PIPE,stderr=subprocess.PIPE)
            while process.poll() is None:
                time.sleep(0.2)
                now = datetime.datetime.now()
                if (now - start).seconds> timeout:
                    return 0
                status = int(process.stdout.readlines()[0])
                return status
        except Exception:
            return 0
        return 0
    return 0

接着是win32_core.py

import win32_connect

def MyPing():
    fp = open("win32_base.db", "r", encoding="utf-8")
    count = len(open("win32_base.db", "r", encoding="utf-8").readlines())
    print("-" * 100)
    print("{0:20} \t {1:10} \t {2:13} \t {3:5} \t {4:9} \t {5:40}".format("IP地址","机器系统","设备SN","机房位置","存活状态","主机作用"))
    print("-" * 100)
    for each in range(count):
        item = fp.readline().replace("\n","")
        eval_list = eval(item)
        command = "engine.exe -s {} -p 10050 -k agent.ping".format(eval_list[0])
        ref = win32_connect.GetPing(command, 1)
        print("{0:20} \t {1:15} \t {2:13} \t {3:10} \t {4:5} \t {5:40}".
              format(eval_list[0],eval_list[1],eval_list[2],eval_list[3],ref,eval_list[4]))
    fp.close()

# 统计系统启动进程
def MyProcessCheck():
    process_cache = []
    proc_fp = open("win32_process.db", "r", encoding="utf-8")
    count = len(open("win32_process.db", "r", encoding="utf-8").readlines())

    for each in range(1, count + 1):
        proc = eval(proc_fp.readline())
        proc_len = len(proc)
        print("-" * 70)
        print("---> 巡检地址: {}".format(proc[0]))
        print("-" * 70)
        for process in range(1, proc_len):
            ref = win32_connect.GetProcessStatus(proc[0], 10050, 3, proc[process])
            if (ref != 0):
                print("进程: {0:18} \t  进程数: {1:5} \t 状态: {2}".format(proc[process], ref, "√"))
            else:
                print("进程: {0:18} \t  进程数: {1:5} \t 状态: {2}".format(proc[process], ref, "×"))
        print()
    proc_fp.close()

# 统计系统负载情况
def GetLoadAvg():
    fp = open("win32_base.db", "r", encoding="utf-8")
    count = len(open("win32_base.db", "r", encoding="utf-8").readlines())
    print("-" * 120)
    print("IP地址 \t\t\t 系统类型 \t\t CPU利用率 \t 内存利用率 \t 1分钟负载 \t 5分钟负载 \t 15分钟负载 \t 主机位置 \t\t 主机作用")
    print("-" * 120)
    for item in range(count):
        ptr = eval(fp.readline())
        command = "engine.exe -s {} -p 10050 -k agent.ping".format(ptr[0])
        flag = win32_connect.GetPing(command, 1)
        if flag == "正常":
            cpu_info = win32_connect.GetCPUCoreInfo(ptr[0], 10050, 1)
            mem_info = win32_connect.GetMemInfo(ptr[0], 10050, 1)
            cpu_load1 = win32_connect.GetCPULoadAvg(ptr[0], 10050, "avg1", 1)
            cpu_load5 = win32_connect.GetCPULoadAvg(ptr[0], 10050, "avg5", 1)
            cpu_load15 = win32_connect.GetCPULoadAvg(ptr[0], 10050, "avg15", 1)
            print("{0:10} \t {1:15} \t {2:4} \t {3:4} \t {4:7} \t {5:7} \t {6:7} \t\t {7:10} \t {8:30}".
                  format(ptr[0],ptr[1],cpu_info,mem_info,cpu_load1,cpu_load5,cpu_load15,ptr[3],ptr[4]))
        else:
            print("{0:10} \t {1:15} \t {2:4} \t {3:4} \t {4:7} \t {5:7} \t {6:7} \t\t {7:10} \t {7:30}".
                  format(ptr[0],ptr[1],"-1","-1","-1","-1","-1",ptr[3],ptr[4]))
    fp.close()

# 统计磁盘
def GetFDisk():
    fp = open("win32_disk.db", "r", encoding="utf-8")
    count = len(open("win32_disk.db", "r", encoding="utf-8").readlines())
    for each in range(1,count+1):
        item = fp.readline().replace("\n", "")
        eval_list = eval(item)
        print()
        print("-" * 80)
        print("检测主机: {}".format(eval_list[0]))
        print("-" * 80)
        for x in range(1,(len(eval_list)-1)+1):
            command = "engine.exe -s {} -p 10050 -k agent.ping".format(eval_list[0])
            ref = win32_connect.GetPing(command, 1)
            if ref == "正常":
                pfree,ptotal = win32_connect.GetDisk(eval_list[0], 10050, 1, eval_list[x])
                print("---> 磁盘分区: {0:10} \t 剩余空间: {1:5} \t 已用空间: {2:5}".format(eval_list[x],str(pfree)+"%",str(ptotal)+"%"))
    fp.close()

# 统计Web服务器是否运行中
def CheckWebServer():
    fp = open("WebServer.db", "r", encoding="utf-8")
    count = len(open("WebServer.db", "r", encoding="utf-8").readlines())

    for each in range(count):
        fp_list = eval(fp.readline().replace("\n",""))
        ref = win32_connect.CheckWebServerStatus("132.35.93.2", 10050, 1, fp_list[0], fp_list[1])
        if ref == 1:
            print("[成功] -> Web主机: {0:20} \t 检测端口: {1:5} \t 业务名称: {2:20}".format(fp_list[0],fp_list[1],fp_list[2]))
        else:
            print("*失败* -> Web主机: {0:20} \t 检测端口: {1:5} \t 业务名称: {2:20}".format(fp_list[0],fp_list[1],fp_list[2]))
    fp.close()

最后的win32_engine.py

import os,subprocess,sys,math
import time
import datetime

# 检查主机状态
def GetPing(addr,port,timeout):
    try:
        command = "engine.exe -s {} -p {} -k agent.ping".format(addr, port).split(" ")
        start = datetime.datetime.now()
        process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
        while process.poll() is None:
            time.sleep(1)
            now = datetime.datetime.now()
            if (now - start).seconds > timeout:
                return 0
        return 1
    except Exception:
        return 0

# 获取目标主机名 仅用于Windows
def GetHostName(addr,port,timeout):
    try:
        Command = "engine.exe -s {} -p {} -k system.hostname".format(addr,port)
        start = datetime.datetime.now()
        process = subprocess.Popen(Command, shell=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE)
        HostName = str(process.stdout.readlines()[0].split()[0],"utf-8")

        while process.poll() is None:
            time.sleep(1)
            now = datetime.datetime.now()
            if (now - start).seconds > timeout:
                return 0
        return HostName
    except Exception:
        return 0

# 得到系统型号,例如windows linux等
def GetSysUname(addr,port,timeout):
    try:
        Command = "engine.exe -s {} -p {} -k system.uname".format(addr,port)
        start = datetime.datetime.now()
        process = subprocess.Popen(Command, shell=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE)
        Uname = str(process.stdout.readlines()[0].split()[0],"utf-8")

        while process.poll() is None:
            time.sleep(1)
            now = datetime.datetime.now()
            if (now - start).seconds > timeout:
                return 0
        return Uname
    except Exception:
        return 0

# 列出系统平台  x86 or x64
def GetSysArch(addr,port,timeout):
    try:
        command = "engine.exe -s {} -p {} -k system.sw.arch".format(addr, port).split(" ")
        start = datetime.datetime.now()
        process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
        while process.poll() is None:
            time.sleep(1)
            now = datetime.datetime.now()
            if (now - start).seconds > timeout:
                return 0
        arch = process.stdout.readlines()[0].split()[0]
        return str(arch,"utf-8")
    except Exception:
        return 0

# 获取CPU核心数
def GetCPUCoreNumber(addr,port,timeout):
    try:
        Command = "engine.exe -s {} -p {} -k system.cpu.num[online]".format(addr,port)
        start = datetime.datetime.now()
        process = subprocess.Popen(Command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        CpuCoreNum = process.stdout.readlines()[0].split()[0]
        #print()
        return str(CpuCoreNum,"utf-8")
    except Exception:
        return 0

# 获取CPU的利用率 10%
def GetCPUCoreInfo(addr,port,timeout):
    try:
        Command = "engine.exe -s {} -p {} -k system.cpu.util".format(addr, port)
        start = datetime.datetime.now()
        process = subprocess.Popen(Command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        CPU = math.ceil(float(process.stdout.readlines()[0].split()[0]))
        while process.poll() is None:
            time.sleep(1)
            now = datetime.datetime.now()
            if (now - start).seconds > timeout:
                return 0
        return CPU
    except Exception:
        return 0

# 获取CPU 的负载值 avg1 avg5 avg15
def GetCPULoadAvg(addr,port,avg,timeout):
    try:
        Command = "engine.exe -s {} -p {} -k system.cpu.load[,{}]".format(addr, port,avg)
        start = datetime.datetime.now()
        process = subprocess.Popen(Command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        CPU = float(process.stdout.readlines()[0].split()[0])
        while process.poll() is None:
            time.sleep(1)
            now = datetime.datetime.now()
            if (now - start).seconds > timeout:
                return 0
        return CPU
    except Exception:
        return 0

# 获取内存数据 返回总内存/剩余内存
def GetMemInfo(addr,port,timeout):
    try:
        Total = subprocess.Popen("engine.exe -s {} -p {} -k vm.memory.size[total]".format(addr, port)
                                 , shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        Free = subprocess.Popen("engine.exe -s {} -p {} -k vm.memory.size[free]".format(addr, port)
                                , shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        Total = math.ceil(int(Total.stdout.readlines()[0].split()[0])/1024/1024)
        Free = math.ceil(int(Free.stdout.readlines()[0].split()[0])/1024/1024)
        return Total,Free
    except Exception:
        return 0

# 获取全部磁盘容量
def GetDiskInfo(self):
    try:
        Free = subprocess.Popen("engine.exe -s {} -p {} -k vfs.fs.size[/,pfree]".format(self.addr, self.port)
                                 , shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        Free = float(Free.stdout.readlines()[0].split()[0])
        return Free
    except Exception:
        return 0



# 获取特定进程是否运行
def GetProcessStatus(addr,port,procname):
    command = "engine.exe -s {} -p {} -k proc.num[\"{}\"]".format(addr,port,procname)
    process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
    ref = process.stdout.readlines()[0]
    if ref == b"1\r\n":
        return 1
    return 0

# 获取端口开放状态
def GetNetworkPort(addr,port,check_port):
    command = "engine.exe -s {} -p {} -k net.tcp.listen[{}]".format(addr,port,check_port)
    process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
    ref = process.stdout.readlines()[0]
    if ref == b"1\r\n":
        return 1
    return 0

监控unix系列,Unix_core.py

import paramiko

ssh = paramiko.SSHClient()
ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())

# 执行命令CMD
def BatchCMD(address,username,password,port,command):
    try:
        ssh.connect(hostname=address,username=username,password=password,port=port,timeout=2)
        stdin , stdout , stderr = ssh.exec_command(command)
        result = stdout.read()
        if len(result) != 0:
            return result
        else:
            return -1
    except Exception:
        return -1

# 通过获取主机Ping状态
def GetPing():
    fp = open("unix_base.db", "r", encoding="utf-8")
    count = len(open("unix_base.db", "r", encoding="utf-8").readlines())
    print("-" * 100)
    print("{0:20} \t {1:10} \t {2:13} \t {3:5} \t {4:9} \t {5:40}".format("IP地址","机器系统","设备SN","机房位置","存活状态","主机作用"))
    print("-" * 100)
    for each in range(count):
        ref = eval(fp.readline())
        ret = BatchCMD(ref[0],ref[5],ref[6],22,"pwd | echo $?")
        if(int(ret)==0):
            print("{0:20} \t {1:10} \t {2:11} \t {3:5} \t {4:9} \t {5:40}".
                  format(ref[0],ref[1],ref[2],ref[3],"正常",ref[4]))
        else:
            print("{0:20} \t {1:10} \t {2:13} \t {3:5} \t {4:9} \t {5:40}".
                  format(ref[0],ref[1],ref[2],ref[3],"异常",ref[4]))
    fp.close()

# ps aux | grep "usbCfgDev" | grep -v "grep" | awk {'print $2'}
def GetProcessStatus():
    fp = open("unix_process.db", "r", encoding="utf-8")
    count = len(open("unix_process.db", "r", encoding="utf-8").readlines())
    for each in range(count):
        proc = eval(fp.readline())
        proc_len = len(proc)
        print("-" * 70)
        print("---> 巡检地址: {0:10} \t 登录用户: {1:7} \t 登录密码: {2:10}".format(proc[0],proc[1],proc[2]))
        print("-" * 70)
        for process in range(3, proc_len):
            command = "ps aux | grep \'{}\' | grep -v \'grep\' | awk '{}' | head -1".format(proc[process],"{print $2}")
            try:
                ref = BatchCMD(proc[0],proc[1],proc[2],22,command)
                if(int(ref)!=-1):
                    print("进程: {0:18}  \t PID: {1:10} \t 状态: {2}".format(proc[process], int(ref),"√"))
                else:
                    print("进程: {0:18} \t  PID:{1:10} \t 状态: {2}".format(proc[process], 0,"×"))
            except Exception:
                print("进程: {0:18} \t  PID:{1:10} \t 状态: {2}".format(proc[process], 0,"×"))
        print()
    fp.close()


def GetDiskStatus():
    fp = open("unix_disk.db", "r", encoding="utf-8")
    count = len(open("unix_disk.db", "r", encoding="utf-8").readlines())
    for each in range(count):
        proc = eval(fp.readline())
        proc_len = len(proc)
        print("-" * 100)
        print("---> 巡检地址: {0:10} \t 登录系统: {1:7} \t 登录账号: {2:10} 登录密码: {3:10}".
              format(proc[0],proc[1],proc[2],proc[3]))
        print("-" * 100)
        try:
            ref = BatchCMD(proc[0], proc[2], proc[3], 22, "df | grep -v 'Filesystem'")
            st = str(ref).replace("\\n", "\n")
            print(st.replace("b'", "").replace("'", ""))
        except Exception:
            pass
        print()
    fp.close()

# 运行命令
def RunCmd(command,system):
    fp = open("unix_disk.db", "r", encoding="utf-8")
    count = len(open("unix_disk.db", "r", encoding="utf-8").readlines())
    for each in range(count):
        proc = eval(fp.readline())
        proc_len = len(proc)

        if proc[1] == system:
            print("-" * 100)
            print("---> 巡检地址: {0:10} \t 登录系统: {1:7} \t 登录账号: {2:10} 登录密码: {3:10}".
                  format(proc[0],proc[1],proc[2],proc[3]))
            print("-" * 100)
            try:
                ref = BatchCMD(proc[0], proc[2], proc[3], 22, command)
                st = str(ref).replace("\\n", "\n")
                print(st.replace("b'", "").replace("'", ""))
            except Exception:
                pass
    fp.close()

配置文件

unix_base.db
["127.0.0.1","Suse","CN11111","C-F-04","国漫 CTI3.6","root","1111111"]

unix_disk.db
["127.0.0.1","Suse","root","123123"]

unix_process.db
["127.0.0.1","root","123123","oracle","mysqld"]

webserver.db
["127.0.0.1",8005,"跨省投诉-在线客服跨省协办"]

win32_base.db
["127.0.0.1","Windows XP","0000000","0-0-0","本地跳板机"]

win32_disk.db
["127.0.0.1","c:","d:"]

win32_port.db
["127.0.0.1","80","3421","8080"]

win32_process.db
["127.0.0.1","ilmt_tray.exe","awhost32.exe"]

比较好的改进写法

上方代码中每个表都有一份账号密码,很麻烦,第二次重写我通过一个UUID序号,来定位用户名密码等登录信息,然后在一个配置文件中包含了所有配置项,程序中对不同的项目进行解析,来实现一个配置文件配置所有的配置项,只有基础base结构存储账号密码,每个base结构对应一个UUID号,当添加进程等表时,自动根据UUID号码,来解析账号密码,这样通过序号关联,只需写一份密码即可,通过UUID相关联。

使用Django开发图形化界面

收集目标主机数据的类:

import os,subprocess,sys,math

class GetSysInfo(object):
    def __init__(self,addr,port):
        self.addr = addr
        self.port = port
    def GetHostName(self):
        try:
            Command = "get.exe -s {} -p {} -k system.hostname".format(self.addr,self.port)
            proc = subprocess.Popen(Command, shell=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE)
            HostName = str(proc.stdout.readlines()[0].split()[0])
            return HostName
        except Exception:
            return 0
    def GetCPUInfo(self):
        try:
            Command = "get.exe -s {} -p {} -k system.cpu.util".format(self.addr, self.port)
            proc = subprocess.Popen(Command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
            CPU = math.ceil(float(proc.stdout.readlines()[0].split()[0]))
            return CPU
        except Exception:
            return 0
    def GetMemInfo(self):
        try:
            Total = subprocess.Popen("get.exe -s {} -p {} -k vm.memory.size[total]".format(self.addr, self.port)
                                     , shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
            Free = subprocess.Popen("get.exe -s {} -p {} -k vm.memory.size[free]".format(self.addr, self.port)
                                    , shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
            Total = math.ceil(int(Total.stdout.readlines()[0].split()[0])/1024/1024)
            Free = math.ceil(int(Free.stdout.readlines()[0].split()[0])/1024/1024)
            return Total,Free
        except Exception:
            return 0

    def GetDiskInfo(self):
        try:
            Free = subprocess.Popen("get.exe -s {} -p {} -k vfs.fs.size[/,pfree]".format(self.addr, self.port)
                                     , shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
            Free = float(Free.stdout.readlines()[0].split()[0])
            return Free
        except Exception:
            return 0
    def GetNetInfo(self):
        try:
            InModule = subprocess.Popen("get.exe -s {} -p {} -k net.if.in[ens32,bytes]".format(self.addr, self.port)
                                        , shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
            InModule = int(InModule.stdout.readlines()[0].split()[0])
            OutModule = subprocess.Popen("get.exe -s {} -p {} -k net.if.out[ens32,bytes]".format(self.addr, self.port)
                                         , shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
            OutModule = int(OutModule.stdout.readlines()[0].split()[0])
            return InModule,OutModule
        except Exception:
            return 0


a = GetSysInfo("192.168.1.20","10050")
b = a.GetNetInfo()
print(b)

admin.py

from django.contrib import admin
from MyWeb.models import *

@admin.register(HostInfo)
class MyAdmin(admin.ModelAdmin):
    list_display = ("HostAddr","HostName","HostCPU","HostMem","HostDisk","HostNet")

models.py

from django.db import models

class HostInfo(models.Model):
    id = models.AutoField(primary_key = True)
    HostAddr = models.CharField(max_length=64,verbose_name="主机地址")
    HostName = models.CharField(max_length=64,verbose_name="主机名称")
    HostCPU = models.CharField(max_length=64,verbose_name="CPU利用率")
    HostMem = models.CharField(max_length=64,verbose_name="内存数据")
    HostDisk = models.CharField(max_length=64,verbose_name="磁盘空闲")
    HostNet = models.CharField(max_length=64,verbose_name="网卡流量")

img

先来判断是否有指定的IP地址,有的话后端直接爬取数据并填充到数据库中,要求是只需要输入IP地址即可取出所有的数据,并自动填充,有几台自动填充几台。

if __name__ == "__main__":
    conn = sqlite3.connect("C:/Users/LyShark/PycharmProjects/MyProject/db.sqlite3")
    cursor = conn.cursor()
    cursor.execute('select * from MyWeb_hostinfo;')
    data = cursor.fetchall()

    for item in range(0,len(data)):
        addr = data[item][1]
        info = GetSysInfo(addr,"10050")
        print(info.GetNetInfo())

fabric的使用技巧 fabric工具也是自动化运维利器,其默认依赖于paramiko的二次封装.

# 简单实现命令执行
from fabric import Connection
conn = Connection(host="192.168.1.10",user="root",port="22",connect_kwargs={"password":"123"})
try:
    with conn.cd("/var/www/html/"):
        ret = conn.run("ls -lh",hide=True)
        print("主机:" + conn.host + "端口:" + conn.port + "完成")
except Exception:
    print("主机:" + conn.host + "端口:" + conn.port + "失败")

# 读取数据到本地
from fabric import Connection
conn = Connection(host="192.168.1.20",user="root",port="22",connect_kwargs={"password":"123"})

uname = conn.run('uname -s', hide=True)
if 'Linux' in uname.stdout:
    command = "df -h / | tail -n1 | awk '{print $5}'"
    print(conn.run(command,hide=True).stdout.strip())

# 文件上传与下载
from fabric import Connection
conn = Connection(host="192.168.1.20",user="root",port="22",connect_kwargs={"password":"123"})
conn.put("D://zabbix_get.exe","/tmp/zabbix.exe")  # 文件上传
conn.get("/tmp/zabbix.exe","./zab.exe")           # 下载文件