Warning: file_get_contents(/data/phpspider/zhask/data//catemap/2/python/296.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
python脚本在发出API请求时经常中断_Python_Api_Logging_Fork_Check Mk - Fatal编程技术网

python脚本在发出API请求时经常中断

python脚本在发出API请求时经常中断,python,api,logging,fork,check-mk,Python,Api,Logging,Fork,Check Mk,我有一个python脚本,它作为Ganeti钩子运行,在添加、删除、关闭和启动Ganeti实例之后运行。当向Ganeti添加新实例时,钩子应该添加此实例,以通过API调用检查_mk。删除Ganeti中的实例会触发在check_mk中删除实例。关闭实例会在check_mk中设置时间,启动实例会在check_mk中删除由钩子设置的时间。我们在多个位置(数据中心)有Ganeti集群 我们使用check mk raw进行分布式监控,每个数据中心运行一个主设备和多个从设备。因此,添加、删除等。。只能通过对

我有一个python脚本,它作为Ganeti钩子运行,在添加、删除、关闭和启动Ganeti实例之后运行。当向Ganeti添加新实例时,钩子应该添加此实例,以通过API调用检查_mk。删除Ganeti中的实例会触发在check_mk中删除实例。关闭实例会在check_mk中设置时间,启动实例会在check_mk中删除由钩子设置的时间。我们在多个位置(数据中心)有Ganeti集群

我们使用check mk raw进行分布式监控,每个数据中心运行一个主设备和多个从设备。因此,添加、删除等。。只能通过对主机的API调用来完成

Ganeti钩子stdout和stderr被重定向到文件,这在Ganeti中是硬编码的。只有当脚本失败时,错误才会写入标准输出(cosole),但如果脚本成功运行,输出将重定向到文件,并且大部分不会太多。因此,
print()
没有帮助。因此,我使用日志库

主要问题是脚本经常中断,有时甚至没有日志记录。我不知道这是我的编码能力还是网络延迟。我今天添加了所有的例外,以了解发生了什么,但这没有帮助

如果能在这方面得到任何帮助,我将不胜感激。下面是完整的脚本

非常感谢

编辑:我删除了大部分异常,因为它们并不真正相关,并修复了脚本中的一些打字错误

#!/usr/bin/env python

"""Manage host in monitoring."""

import os
import re
import sys
import json
import socket
import logging
import requests


APIURL = 'https://checkmk.host/site/check_mk/webapi.py'
WEBURL = 'https://checkmk.host/site/check_mk/view.py'


def hook_mon_token():
    """Get secrets for monitoring from file.

    This file is written by ganeti puppet module.
    """
    with open('/root/.hook_mon_token', 'r') as _file:
        ldap_secret = _file.readline()
        mon_token = _file.readline()
        return ldap_secret, mon_token


def get_datacenter():
    """Get datacenter we are run at."""
    datacenter = requests.get('http://localhost:8500/v1/catalog/datacenters').json()
    if '-' in datacenter[0]:
        datacenter = datacenter[0].split('-')[1]
        return datacenter.lower()
    else:
        return datacenter[0].lower()


def get_tenant(datacenter):
    """Return tenant name."""
    tenant = str.lower(''.join(re.findall(r'tenant:([\w-]+)', os.environ['GANETI_INSTANCE_NIC0_NETWORK_TAGS'])))
    if tenant == '':
        tenant = datacenter
    return tenant


def checkmk_api_call(action):
    """Call the Web API."""
    ldap_secret, mon_token = hook_mon_token()
    auth = ('check_mk_user', ldap_secret.strip())
    datacenter = get_datacenter()
    instance_name = os.environ['GANETI_INSTANCE_NAME'].split('.', 1)[0]
    server_ip = os.environ['GANETI_INSTANCE_NIC0_IP']
    params = {'action': action, '_username': 'automation', '_secret': mon_token.strip()}

    if action == 'add_host':
        if datacenter in ('dc1', 'dc2', 'dc3'):
            tenant = get_tenant(datacenter)
            hostname = tenant.upper() + '.' + instance_name
            folder = datacenter + "/" + tenant + "/hosts"
            request = {
                'hostname': hostname,
                'folder': folder,
                'attributes': {
                    'ipaddress': server_ip,
                    'site': datacenter,
                    'tag_' + datacenter: datacenter,
                    'tag_' + datacenter + '-vm': datacenter + '-vm',
                    'tag_' + tenant + '-vm': tenant + '-vm',
                    'tag_agent': 'cmk-agent',
                    'tag_snmp': 'no-snmp'
                },
                'create_folders': '0'
            }
        else:
            hostname = datacenter.upper() + '.' + instance_name
            folder = datacenter + "/hosts"
            request = {
                'hostname': hostname,
                'folder': folder,
                'attributes': {
                    'ipaddress': server_ip,
                    'site': datacenter,
                    'tag_' + datacenter: datacenter,
                    'tag_' + datacenter + '-vm': datacenter + '-vm',
                    'tag_agent': 'cmk-agent',
                    'tag_snmp': 'no-snmp'
                },
                'create_folders': '0'
            }

    elif action == 'delete_host':
        api_answer, request, hostname = get_host('delete_host')
        if api_answer != server_ip:
            return api_answer, hostname

    try:
        resp_post = requests.post(APIURL, params=params, auth=auth, data={'request': json.dumps(request)})
        if json.loads(resp_post.content)['result_code']:
            apierror = json.loads(resp_post.content)['result']
            return apierror, hostname
        return False, hostname
    except requests.exceptions.RequestException as error:
        return error, hostname


def get_host(action):
    """Get the Host."""
    ldap_secret, mon_token = hook_mon_token()
    auth = ('check_mk_user', ldap_secret.strip())
    datacenter = get_datacenter()
    instance_name = os.environ['GANETI_INSTANCE_NAME'].split('.', 1)[0]
    params = {'action': 'get_host', '_username': 'automation', '_secret': mon_token.strip()}

    if datacenter in ('dc1', 'dc2', 'dc3'):
        tenant = get_tenant(datacenter)
        hostname = tenant.upper() + '.' + instance_name

        request = {
            'hostname': hostname
        }

    else:
        hostname = datacenter.upper() + '.' + instance_name
        request = {
            'hostname': hostname
        }

    if action == "delete_host":
        try:
            resp_post = requests.post(APIURL, params=params, auth=auth, data={'request': json.dumps(request)})
            if json.loads(resp_post.content)['result_code']:
                apierror = json.loads(resp_post.content)['result']
                return apierror, request, hostname
            response_post = resp_post.json()
            host_ip = response_post['result']['attributes']['ipaddress']
            return host_ip, request, hostname
        except requests.exceptions.RequestException as error:
            return error, request, hostname

    else:
        try:
            resp_post = requests.post(APIURL, params=params, auth=auth, data={'request': json.dumps(request)})
            if json.loads(resp_post.content)['result_code']:
                apierror = json.loads(resp_post.content)['result']
                return apierror
            return False
        except requests.exceptions.RequestException as error:
            return error


def is_down():
    """Check, if down and downtime comment."""
    ldap_secret, mon_token = hook_mon_token()
    auth = ('check_mk_user', ldap_secret.strip())
    datacenter = get_datacenter()
    instance_name = os.environ['GANETI_INSTANCE_NAME'].split('.', 1)[0]

    if datacenter in ('dc1', 'dc2', 'dc3'):
        tenant = get_tenant(datacenter)
        hostname = tenant.upper() + '.' + instance_name
    else:
        hostname = datacenter.upper() + '.' + instance_name

    params = {
        '_username': 'automation',
        '_secret': mon_token.strip(),
        'output_format': 'JSON',
        'host_regex': hostname,
        'view_name': 'downtimes'
    }

    apierror = get_host('get_host')
    if apierror:
        return None, apierror, hostname

    try:
        resp_get = requests.get(WEBURL, params=params, auth=auth).text
        resp_json = json.loads(resp_get)
        if len(resp_json) == 1:
            host_is_down = False
            down_comment = ''
        else:
            host_is_down = True
            down_comment = resp_json[1][resp_json[0].index('downtime_comment')]
        return host_is_down, down_comment, hostname
    except requests.exceptions.RequestException as error:
        return None, error, hostname


def checkmk_web_call(action):
    """Call web page view."""
    ldap_secret, mon_token = hook_mon_token()
    auth = ('check_mk_user', ldap_secret.strip())
    datacenter = get_datacenter()
    instance_name = os.environ['GANETI_INSTANCE_NAME'].split('.', 1)[0]

    apierror = get_host('get_host')
    if apierror:
        return apierror

    if datacenter in ('dc1', 'dc2', 'dc3'):
        tenant = get_tenant(datacenter)
        hostname = tenant.upper() + '.' + instance_name
    else:
        hostname = datacenter.upper() + '.' + instance_name

    params = {
        '_do_confirm': 'yes',
        '_do_actions': 'yes',
        '_transid': '-1',
        '_username': 'automation',
        '_secret': mon_token.strip(),
        'output_format': 'JSON'
    }

    if action == 'stop':
        params.update({
            'view_name': 'host',
            'host': hostname,
            '_on_hosts': 'on',
            '_downrange__next_year': 'This+year',
            '_down_comment': 'down by ganeti shutdown'
        })
    elif action == 'start':
        params.update({
            'view_name': 'downtimes',
            'host_regex': hostname,
            '_remove_downtimes': 'Remove'
        })

    try:
        resp = requests.post(WEBURL, params=params, auth=auth)
        return False
    except requests.exceptions.RequestException as error:
        return error


def gnt_action(action):

    logger = logging.getLogger(__name__)

    if action == 'instance-add':
        apierror, hostname = checkmk_api_call('add_host')
        if apierror:
            logger.error(apierror, 'Could not add "%s" to check_mk! Please add it manually!' % hostname)
            os.system('tail -1 /tmp/monitoring_hook.log')
            sys.exit(1)
        logger.info('Added "%s" successfully to check_mk. Please activete changes in WATO' % hostname)
    elif action == 'instance-remove':
        apierror, hostname = checkmk_api_call('delete_host')
        if apierror:
            logger.error(apierror, 'Could not remove "%s" from check_mk! Please remove it manually!' % hostname)
            os.system('tail -1 /tmp/monitoring_hook.log')
            sys.exit(1)
        logger.info('Removed "%s" successfully from check_mk. Please activate changes in WATO' % hostname)
    elif action == 'instance-start':
        host_is_down, down_comment, hostname = is_down()
        if host_is_down is None:
            logger.info(down_comment)
            os.system('tail -1 /tmp/monitoring_hook.log') 
            sys.exit(1)
        elif host_is_down and down_comment == 'down by ganeti shutdown':
            apierror = checkmk_web_call('start')
            if apierror:
                logger.error(apierror)
                os.system('tail -1 /tmp/monitoring_hook.log')
                sys.exit(1)
            logger.info('Removed down time successfully for "%s" in check_mk' % hostname)
        else:
            logger.info('Nothing to do')
    elif action == 'instance-stop':
        host_is_down, down_comment, hostname = is_down()
        if host_is_down is None:
            logger.info(down_comment)
            sys.exit(1)
        elif host_is_down is False:
            apierror = checkmk_web_call('stop')
            if apierror:
                logger.error(apierror)
                os.system('tail -1 /tmp/monitoring_hook.log')
                sys.exit(1)
            logger.info('Set down time successfully for "%s" in check_mk' % hostname)
        else:
            logger.info('Nothing to do. "%s" is already down' % hostname)


def main():

    logger = logging.getLogger(__name__)
    log_file_format = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(process)s - %(message)s')
    log_file_handler = logging.FileHandler('/tmp/monitoring_hook.log')
    log_file_handler.setFormatter(log_file_format)
    log_file_handler.setLevel(logging.DEBUG)
    logger.addHandler(log_file_handler)
    logger.setLevel(logging.INFO)

    """Add Hook for Ganeti to add new instance to monitoring."""
    if socket.getfqdn() == os.environ['GANETI_MASTER']:
        action = os.environ['GANETI_HOOKS_PATH']
        if os.environ['GANETI_POST_INSTANCE_TAGS']:
            if 'monitoring:no' in os.environ['GANETI_POST_INSTANCE_TAGS']:
                logger.info('VM will not be added to check_mk')
                sys.exit(0)
            else:
                gnt_action(action)
        else:
            gnt_action(action)


if __name__ == "__main__":
    try:
        pid = os.fork()
        if pid > 0:
            # Exit parent process
            sys.exit(0)
    except OSError, e:
        print('fork failed: %d (%s)' % (e.errno, e.strerror))
        sys.exit(1)

    main()

经过更多的调试,我发现脚本只在一些数据中心失败,而在其他数据中心总是成功,很明显这是一个网络问题

API请求被发送到监控服务器的WAN IP,因此我只是将其替换为
/etc/hosts
中的LAN IP,直到找到根本原因

抱歉,这篇不相关的文章,因为脚本做了它应该做的事情