当前位置 主页 > 服务器问题 > win服务器问题汇总 >

    详解用Python实现自动化监控远程服务器

    栏目:win服务器问题汇总 时间:2019-10-10 08:14

    最近发现Python课器做很多事情,在监控服务器有其独特的优势,耗费资源少,开发周期短。

    首先我们做一个定时或者实时脚本timedtask.py,让其定时监控目标服务器,两种方式:

    第一种:

    #!/usr/bin/env python
    # -*- coding: utf-8 -*-
    # @Time  : 2017/11/27 15:59
    # @Desc  : 定时任务,以需要的时间间隔执行某个命令
    # @File  : timedtask.py
    # @Software: PyCharm
     
    import time, os
    from monitorserver import alltask
     
     
    def roll_back(cmd, inc = 60):
      while True:
        #执行方法,函数
        alltask()
        time.sleep(inc)
     
    roll_back("echo %time%", 5)
    

    第二种:

    #!/usr/bin/env python
    # -*- coding: utf-8 -*-
    # @Time  : 2017/11/27 15:59
    # @Desc  : 定时任务,以需要的时间间隔执行某个命令
    # @File  : timedtask.py
    # @Software: PyCharm
     
    import time, os
     
    def roll_back(cmd, inc = 60):
      while True:
        #监控代码文件所在位置
        os.system('python /home/../monitorserver.py');
        time.sleep(inc)
     
    roll_back("echo %time%", 5)
    

    做过监控应该都知道,我们主要监控服务器,负载均衡、磁盘、内存、CPU、网络接口(流量)、端口代码,主要针对这些,我做了以下远程监控,第一种和第二种监控代码一样,代码monitorserver.py如下:

    #!/usr/bin/env python
    # -*- coding: utf-8 -*-
    # @Time  : 2017/11/27 15:59
    # @Desc  : 服务器监控代码
    # @File  : monitorserver.py
    # @Software: PyCharm
     
    import pexpect
    import re
     
    import time
    import threading
     
    """
    主方法
    127.0.0.1#远程服务器ip地址
    """
    def ssh_command(user, host, password, command):
      ssh_new_key = 'Are you sure you want to continue connecting'
      child = pexpect.spawn('ssh -l %s %s %s' % (user, host, command))
      i = child.expect([pexpect.TIMEOUT, ssh_new_key, 'password: '])
      if i == 0:
        print 'ERROR!'
        print 'SSH could not login. Here is what SSH said:'
        print child.before, child.after
        return None
      if i == 1:
        child.sendline('yes')
        child.expect('password: ')
        i = child.expect([pexpect.TIMEOUT, 'password: '])
        if i == 0:
          print 'ERROR!'
          print 'SSH could not login. Here is what SSH said:'
          print child.before, child.after
          return None
      child.sendline(password)
      return child
     
     
    """
    内存监控
    """
    def mem_info():
     
      child = ssh_command("远程服务器用户名", "127.0.0.1", "远程服务器密码", "cat /proc/meminfo")
      child.expect(pexpect.EOF)
      mem = child.before
      mem_values = re.findall("(\d+)\ kB", mem)
      MemTotal = mem_values[0]
      MemFree = mem_values[1]
      Buffers = mem_values[2]
      Cached = mem_values[3]
      SwapCached=mem_values[4]
      SwapTotal = mem_values[13]
      SwapFree = mem_values[14]
      print '******************************内存监控*********************************'
      print "*******************时间:", time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), "******************"
      print "总内存:",MemTotal
      print "空闲内存:", MemFree
      print "给文件的缓冲大小:",Buffers
      print "高速缓冲存储器使用的大小:", Cached
      print "被高速缓冲存储用的交换空间大小:", SwapCached
      print "给文件的缓冲大小:", Buffers
      if int(SwapTotal) == 0:
        print u"交换内存总共为:0"
      else:
        Rate_Swap = 100 - 100*int(SwapFree)/float(SwapTotal)
        print u"交换内存利用率:", Rate_Swap
      Free_Mem = int(MemFree) + int(Buffers) + int(Cached)
      Used_Mem = int(MemTotal) - Free_Mem
      Rate_Mem = 100*Used_Mem/float(MemTotal)
      print u"内存利用率:", str("%.2f" % Rate_Mem), "%"
     
     
    """
    内核线程、虚拟内存、磁盘、陷阱和 CPU 活动的统计信息
    """
    def vm_stat_info():
      child = ssh_command("远程服务器用户名", "127.0.0.1", "远程服务器密码", "vmstat 1 2 | tail -n 1")
      child.expect(pexpect.EOF)
      vmstat_info = child.before.strip().split()
      processes_waiting = vmstat_info[0]
      processes_sleep = vmstat_info[1]
      swpd = vmstat_info[2]
      free = vmstat_info[3]
      buff = vmstat_info[4]
      cache = vmstat_info[5]
      si = vmstat_info[6]
      so = vmstat_info[7]
      io_bi = vmstat_info[8]
      io_bo = vmstat_info[9]
      system_interrupt = vmstat_info[10]
      system_context_switch = vmstat_info[11]
      cpu_user = vmstat_info[12]
      cpu_sys = vmstat_info[13]
      cpu_idle = vmstat_info[14]
      cpu_wait = vmstat_info[15]
      st=vmstat_info[16]
      print '****************************内核线程、虚拟内存、磁盘、陷阱和 CPU 活动的统计信息监控****************************'
      print "*******************时间:", time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), "******************"
      print "等待运行进程的数量:", processes_waiting
      print "处于不间断状态的进程:", processes_sleep
      print "使用虚拟内存(swap)的总量:", swpd
      print "空闲的内存总量:", free
      print "用作缓冲的内存总量:", buff
      print "用作缓存的内存总量:", cache
      print "交换出内存总量 :", si
      print "交换入内存总量 :", so
      print "从一个块设备接收:", io_bi
      print "发送到块设备:", io_bo
      print "每秒的中断数:", system_interrupt
      print "每秒的上下文切换数:", system_context_switch
      print "用户空间上进程运行的时间百分比:", cpu_user
      print "内核空间上进程运行的时间百分比:", cpu_sys
      print "闲置时间百分比:", cpu_idle
      print "等待IO的时间百分比:", cpu_wait
      print "从虚拟机偷取的时间百分比:", st
     
     
    '''
    cpu监控
    '''
    def cpu_info():
      child = ssh_command("远程服务器用户名", "127.0.0.1", "远程服务器密码", "cat /proc/cpuinfo")
      child.expect(pexpect.EOF)
      cpuinfo = child.before
      cpu_num = re.findall('processor.*?(\d+)', cpuinfo)[-1]
      cpu_num = str(int(cpu_num) + 1)
      print '***************************************cpu监控***************************************'
      print "*******************时间:", time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), "******************"
      print u"CPU数目:", cpu_num
      li = cpuinfo.replace('\t', '').split('\r')
      CPUinfo = {}
      procinfo = {}
      nprocs = 0
      for line in li:
        if line.find("processor") > -1:
          CPUinfo['CPU%s' % nprocs] = procinfo
          nprocs = nprocs + 1
        else:
          if len(line.split(':')) == 2:
            procinfo[line.split(':')[0].strip()] = line.split(':')[1].strip()
          else:
            procinfo[line.split(':')[0].strip()] = ''
      for processor in CPUinfo.keys():
        print "CPU属于的名字及其编号、标称主频:",CPUinfo[processor]['model name']
        print "CPU属于其系列中的哪一代的代号:", CPUinfo[processor]['model']
        print "CPU制造商:", CPUinfo[processor]['vendor_id']
        print "CPU产品系列代号:", CPUinfo[processor]['cpu family']
        print "CPU的实际使用主频:", CPUinfo[processor]['cpu MHz']
     
     
    """
    负载均衡
    """
    def load_stat():
      child = ssh_command("远程服务器用户名", "127.0.0.1", "远程服务器密码", "cat /proc/loadavg")
      child.expect(pexpect.EOF)
      loadavgs = child.before.strip().split()
      print '************************负载均衡监控****************************'
      print "*******************时间:",time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()),"******************"
      print "系统5分钟前的平均负载:", loadavgs[0]
      print "系统10分钟前的平均负载:", loadavgs[1]
      print "系统15分钟前的平均负载:", loadavgs[2]
      print "分子是正在运行的进程数,分母为总进程数:",loadavgs[3]
      print "最近运行的进程id:", loadavgs[4]
     
     
     
    """
    获取网络接口的输入和输出
    """
    def ionetwork():
      child = ssh_command("远程服务器用户名", "127.0.0.1", "远程服务器密码", "cat /proc/net/dev")
      child.expect(pexpect.EOF)
      netdata = child.before
      li = netdata.strip().split('\n')
      print '************************获取网络接口的输入和输出监控****************************'
      print "*******************时间:",time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()),"******************"
      net = {}
      for line in li[2:]:
        line = line.split(":")
        eth_name = line[0].strip()
        # if eth_name != 'lo':
        net_io = {}
        net_io['Receive'] = round(float(line[1].split()[0]) / (1024.0 * 1024.0), 2)
        net_io['Transmit'] = round(float(line[1].split()[8]) / (1024.0 * 1024.0), 2)
        net[eth_name] = net_io
      print net
     
     
    """ 
    磁盘空间监控
    """
    def disk_stat():
      child = ssh_command("远程服务器用户名", "127.0.0.1", "远程服务器密码", "df -h")
      child.expect(pexpect.EOF)
      disk = child.before
      disklist = disk.strip().split('\n')
      disklists=[]
      for disk in disklist:
        disklists.append(disk.strip().split())
      print '************************磁盘空间监控****************************'
      print "*******************时间:", time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), "******************"
      for i in disklists[1:]:
        print "\t文件系统:", i[0],
        print "\t容量:", i[1],
        print "\t已用:", i[2],
        print "\t可用:", i[3],
        print "\t已用%挂载点:", i[4]
     
     
    """ 
    端口监控
    一般是远程服务器用户名用户
    """
    def getComStr():
      child = ssh_command("远程服务器用户名", "127.0.0.1", "远程服务器密码", "netstat -tpln")
      child.expect(pexpect.EOF)
      Com = child.before
      print '******************************端口监控*********************************'
      print "*******************时间:", time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), "******************"
      print Com
     
     
     
    """
    获取网络接口的输入和输出
    """
    def cpu():
      child = ssh_command("远程服务器用户名", "127.0.0.1", "远程服务器密码", 'cat /proc/stat | grep "cpu "')
      child.expect(pexpect.EOF)
      child1 = ssh_command("远程服务器用户名", "127.0.0.1", "远程服务器密码", 'cat /proc/stat | grep "cpu "')
      child1.expect(pexpect.EOF)
      cpus = child.before.strip().split()
      cpus1 = child1.before.strip().split()
      print '************************cpu使用情况****************************'
      print "*******************时间:",time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()),"******************"
      T1=int(cpus[1])+int(cpus[2])+int(cpus[3])+int(cpus[4])+int(cpus[5])+int(cpus[6])+int(cpus[8])+int(cpus[9])
      T2=int(cpus1[1]) + int(cpus1[2]) + int(cpus1[3]) + int(cpus1[4] )+ int(cpus1[5] )+int( cpus1[6] )+ int(cpus1[8] )+ int(cpus1[9])
      Tol=T2-T1
      Idle=int(cpus1[4]) - int(cpus[4])
      print '总的cpu时间1:',T1
      print '总的cpu时间2:', T2
      print '时间间隔内的所有时间片:', Tol
      print '计算空闲时间idle:', Idle
      print "计算cpu使用率:",100*(Tol-Idle)/Tol,"%"
     
    """
    第一种执行
    """
    def alltask():
      try:
        threads = []
        t1 = threading.Thread(target=mem_info)
        threads.append(t1)
        t2 = threading.Thread(target=vm_stat_info)
        threads.append(t2)
        t3 = threading.Thread(target=cpu_info)
        threads.append(t3)
        t4 = threading.Thread(target=load_stat)
        threads.append(t4)
        t5 = threading.Thread(target=ionetwork)
        threads.append(t5)
        t6 = threading.Thread(target=disk_stat)
        threads.append(t6)
        t7 = threading.Thread(target=getComStr)
        threads.append(t7)
        t8 = threading.Thread(target=cpu)
        threads.append(t8)
        for n in range(len(threads)):
          threads[n].start()
      except Exception, e:
        print str(e)
     
    """
    第二种执行
    """
    if __name__ == '__main__':
      try:
        threads = []
        t1 = threading.Thread(target=mem_info)
        threads.append(t1)
        t2 = threading.Thread(target=vm_stat_info)
        threads.append(t2)
        t3 = threading.Thread(target=cpu_info)
        threads.append(t3)
        t4 = threading.Thread(target=load_stat)
        threads.append(t4)
        t5 = threading.Thread(target=ionetwork)
        threads.append(t5)
        t6 = threading.Thread(target=disk_stat)
        threads.append(t6)
        t7 = threading.Thread(target=getComStr)
        threads.append(t7)
        t8 = threading.Thread(target=cpu)
        threads.append(t8)
        for n in range(len(threads)):
          threads[n].start()
      except Exception, e:
        print str(e)