Python 3.x Splunk Python SDK API job.results限制为50k个结果。试图设置一个偏移量以提取多个50k块,但不';我不知道如何让它工作
我有一份工作,他的工作['resultCount']是367k,但无论我做什么,我似乎都不能超过前50000块 我在这里从一个答案中读到了这段代码,这些答案是针对具有类似最终目标和设置的人的: 我写了下面的代码,对此我做了一些修改,但是我不能让offset=self.\u offset做任何事情,我不知道它应该做什么Python 3.x Splunk Python SDK API job.results限制为50k个结果。试图设置一个偏移量以提取多个50k块,但不';我不知道如何让它工作,python-3.x,python-2.7,api,splunk,splunk-sdk,Python 3.x,Python 2.7,Api,Splunk,Splunk Sdk,我有一份工作,他的工作['resultCount']是367k,但无论我做什么,我似乎都不能超过前50000块 我在这里从一个答案中读到了这段代码,这些答案是针对具有类似最终目标和设置的人的: 我写了下面的代码,对此我做了一些修改,但是我不能让offset=self.\u offset做任何事情,我不知道它应该做什么 class SplunkConnector(object): def __init__(self, username, password, customerGuid): s
class SplunkConnector(object):
def __init__(self, username, password, customerGuid):
self.username = username
self.password = password
self.customerGuid = customerGuid
flag = True
while flag:
try:
self.service = client.connect(host=*****, port=8089, username=self.username, password=self.password, scheme='https')
flag = False
except binding.HTTPError as e:
json_log.debug(str(e))
def search(self, query_dict):
query = query_dict['search']
label = query_dict['label']
search_headers = query_dict['headers']
customer = query_dict['customer']
customerGuid = query_dict['customerGuid']
try:
earliest_time = query_dict['earliest_time']
latest_time = query_dict['latest_time']
except KeyError:
earliest_time = '-1d@d'
latest_time = '@d'
json_log.debug('Starting %s customerGuid=%s' % (label, self.customerGuid))
kwargs_normalsearch = {'exec_mode': 'normal', 'earliest_time': earliest_time, 'latest_time': latest_time, 'output_mode': 'csv'}
job = self.service.jobs.create(query + ' | fillnull value="---"', **kwargs_normalsearch)
while True:
try:
while not job.is_ready():
pass
stats = {"isDone": job["isDone"],
"label": label,
"customer": customer,
"customerGuid": customerGuid,
"doneProgress": float(job["doneProgress"]) * 100,
"scanCount": int(job["scanCount"]),
"eventCount": int(job["eventCount"]),
"resultCount": int(job["resultCount"])}
json_log.debug(stats)
if stats["isDone"] == "1":
json_log.debug("\n\nDone!\n\n")
break
sleep(2)
stats = {"isDone": job["isDone"],
"label": label,
"customer": customer,
"customerGuid": customerGuid,
"doneProgress": float(job["doneProgress"]) * 100}
json_log.debug(stats)
if stats["isDone"] == "1":
json_log.debug('Search %s finished for customerGuid=%s'
% (label, customerGuid))
break
sleep(2)
except binding.HTTPError as e:
json_log.debug(str(e))
pass
except AttributeError:
stats = {"isDone": job["isDone"],
"label": label,
"customer": customer,
"customerGuid": customerGuid,
"doneProgress": float(job["doneProgress"]) * 100}
json_log.debug(stats)
if stats["isDone"] == "1":
json_log.debug('Search %s finished for customerGuid=%s'
% (label, customerGuid))
break
sleep(2)
# Get the results and display them
result_count = job['resultCount']
rs = job.results(count=0)
rr = results.ResultsReader(io.BufferedReader(rs))
results_list = []
for result in rr:
if isinstance(result, results.Message):
# Diagnostic messages may be returned in the results
json_log.debug('%s: %s label=%s customerGuid=%s'
% (result.type, result.message, label, customerGuid))
elif isinstance(result, dict):
# Normal events are returned as dicts
keys, values = [], []
for header in search_headers:
if header not in result.keys():
print(header)
result[header] = ''
for key, value in result.items():
if key in search_headers:
keys.append(str(key))
values.append(str(value))
if not results_list == []:
results_list.append(values)
else:
results_list.append(keys)
results_list.append(values)
output = io.BytesIO()
writer = csv.writer(output, delimiter=',')
writer.writerows(results_list)
output_string = output.getvalue()
assert rr.is_preview is False
job.cancel()
return [label, output_string.replace('\r\n', '\n').replace('---', '')]
def searches(self, query_list):
print(query_list)
if type(query_list) == dict:
query_list = [value for value in query_list.values()]
with closing(ThreadPool(processes=len(query_list))) as pool:
results = pool.map(self.search, query_list)
pool.terminate()
print(results)
search_results = {item[0]: item[1] for item in results}
print(search_results)
return search_results
我成功地使它工作了。我下面的代码应该演示如何实现这一点
import io
import csv
from time import sleep
import splunklib.results as results
import splunklib.client as client
import splunklib.binding as binding
from multiprocessing.pool import ThreadPool
from contextlib import closing
class SplunkConnector(object):
def __init__(self, username, password, customerGuid):
self.username = username
self.password = password
self.customerGuid = customerGuid
flag = True
while flag:
try:
self.service = client.connect(host=*****, port=8089, username=self.username, password=self.password, scheme='https')
flag = False
except binding.HTTPError as e:
json_log.debug(str(e))
def search(self, query_dict):
query = query_dict['search']
label = query_dict['label']
search_headers = query_dict['headers']
customer = query_dict['customer']
customerGuid = query_dict['customerGuid']
try:
earliest_time = query_dict['earliest_time']
latest_time = query_dict['latest_time']
except KeyError:
earliest_time = '-1d@d'
latest_time = '@d'
kwargs_normalsearch = {'exec_mode': 'normal', 'earliest_time': earliest_time, 'latest_time': latest_time, 'output_mode': 'csv'}
flag = True
while flag:
try:
job = self.service.jobs.create(query + ' | fillnull value="---"', **kwargs_normalsearch)
flag = False
except binding.HTTPError as e:
pass
pass
while True:
try:
while not job.is_ready():
pass
stats = {"isDone": job["isDone"],
"label": label,
"customer": customer,
"customerGuid": customerGuid,
"doneProgress": float(job["doneProgress"]) * 100,
"scanCount": int(job["scanCount"]),
"eventCount": int(job["eventCount"]),
"resultCount": int(job["resultCount"])}
if stats["isDone"] == "1":
break
sleep(2)
stats = {"isDone": job["isDone"],
"label": label,
"customer": customer,
"customerGuid": customerGuid,
"doneProgress": float(job["doneProgress"]) * 100}
if stats["isDone"] == "1":
break
sleep(2)
except binding.HTTPError as e:
pass
except AttributeError:
stats = {"isDone": job["isDone"],
"label": label,
"customer": customer,
"customerGuid": customerGuid,
"doneProgress": float(job["doneProgress"]) * 100}
if stats["isDone"] == "1":
break
sleep(2)
result_count = job['resultCount']
offset = 0
count = 50000
results_list = self.results_getter(job, label, customerGuid, search_headers, True, count, offset, result_count)
while len(results_list) < int(result_count) + 1:
offset += count
placeholder = self.results_getter(job, label, customerGuid, search_headers, False, count, offset, result_count)
results_list.extend(placeholder)
output = io.BytesIO()
writer = csv.writer(output, delimiter=',')
writer.writerows(results_list)
output_string = output.getvalue()
job.cancel()
return [label, output_string.replace('\r\n', '\n').replace('---', '')]
def results_getter(self, job, label, customerGuid, search_headers, first, count, offset, result_count):
# Get the results and display them
kwargs_paginate = {"count": count,
"offset": offset}
blocksearch_results = job.results(**kwargs_paginate)
results_list = []
reader = results.ResultsReader(blocksearch_results)
for result in reader:
if isinstance(result, results.Message):
# Diagnostic messages may be returned in the results
elif isinstance(result, dict):
# Normal events are returned as dicts
keys, values = [], []
for header in search_headers:
if header not in result.keys():
result[header] = ''
for key, value in result.items():
if key in search_headers:
keys.append(str(key))
values.append(str(value))
if not results_list == []:
results_list.append(values)
elif first:
results_list.append(keys)
results_list.append(values)
else:
results_list.append(values)
assert not reader.is_preview
return results_list
def searches(self, query_list):
if type(query_list) == dict:
query_list = [value for value in query_list.values()]
with closing(ThreadPool(processes=len(query_list))) as pool:
results = pool.map(self.search, query_list)
pool.terminate()
search_results = {item[0]: item[1] for item in results}
return search_results
导入io
导入csv
从时间上导入睡眠
将splunklib.results导入为结果
将splunklib.client作为客户端导入
导入splunklib.binding作为绑定
从multiprocessing.pool导入线程池
从上下文库导入关闭
类SplunkConnector(对象):
定义初始化(self、用户名、密码、customerGuid):
self.username=用户名
self.password=密码
self.customerGuid=customerGuid
flag=True
而国旗:
尝试:
self.service=client.connect(主机=****,端口=8089,用户名=self.username,密码=self.password,scheme='https')
flag=False
除了binding.HTTPError作为e:
json_log.debug(str(e))
def搜索(自我、查询):
query=query\u dict['search']
label=查询记录['label']
搜索标题=查询标题['headers']
客户=查询记录[“客户”]
customerGuid=query\u dict['customerGuid']
尝试:
最早时间=查询记录[“最早时间”]
latest_time=查询记录['latest_time']
除KeyError外:
最早-1d@d'
最近时间='@d'
kwargs_normalsearch={'exec_mode':'normal','earlime_time':earlime_time','latest_time':latest_time',output_mode':'csv'}
flag=True
而国旗:
尝试:
job=self.service.jobs.create(query+'| fillnull value=“--”,**kwargs_normalsearch)
flag=False
除了binding.HTTPError作为e:
通过
通过
尽管如此:
尝试:
而不是作业。准备好了吗()
通过
stats={“isDone”:作业[“isDone”],
“标签”:标签,
“客户”:客户,
“customerGuid”:customerGuid,
“doneProgress”:浮动(作业[“doneProgress”])*100,
“扫描计数”:int(作业[“扫描计数]),
“eventCount”:int(作业[“eventCount”]),
“resultCount”:int(作业[“resultCount”])}
如果统计数据[“isDone”]=“1”:
打破
睡眠(2)
stats={“isDone”:作业[“isDone”],
“标签”:标签,
“客户”:客户,
“customerGuid”:customerGuid,
“doneProgress”:浮动(作业[“doneProgress”])*100}
如果统计数据[“isDone”]=“1”:
打破
睡眠(2)
除了binding.HTTPError作为e:
通过
除属性错误外:
stats={“isDone”:作业[“isDone”],
“标签”:标签,
“客户”:客户,
“customerGuid”:customerGuid,
“doneProgress”:浮动(作业[“doneProgress”])*100}
如果统计数据[“isDone”]=“1”:
打破
睡眠(2)
结果计数=作业['resultCount']
偏移量=0
计数=50000
results\u list=self.results\u getter(作业、标签、customerGuid、搜索标题、True、计数、偏移量、结果计数)
而len(结果列表)import io
import csv
from time import sleep
import splunklib.results as results
import splunklib.client as client
import splunklib.binding as binding
from multiprocessing.pool import ThreadPool
from contextlib import closing
class SplunkConnector(object):
def __init__(self, username, password, customerGuid):
self.username = username
self.password = password
self.customerGuid = customerGuid
flag = True
while flag:
try:
self.service = client.connect(host=*****, port=8089, username=self.username, password=self.password, scheme='https')
flag = False
except binding.HTTPError as e:
json_log.debug(str(e))
def search(self, query_dict):
query = query_dict['search']
label = query_dict['label']
search_headers = query_dict['headers']
customer = query_dict['customer']
customerGuid = query_dict['customerGuid']
try:
earliest_time = query_dict['earliest_time']
latest_time = query_dict['latest_time']
except KeyError:
earliest_time = '-1d@d'
latest_time = '@d'
kwargs_normalsearch = {'exec_mode': 'normal', 'earliest_time': earliest_time, 'latest_time': latest_time, 'output_mode': 'csv'}
flag = True
while flag:
try:
job = self.service.jobs.create(query + ' | fillnull value="---"', **kwargs_normalsearch)
flag = False
except binding.HTTPError as e:
pass
pass
while True:
try:
while not job.is_ready():
pass
stats = {"isDone": job["isDone"],
"label": label,
"customer": customer,
"customerGuid": customerGuid,
"doneProgress": float(job["doneProgress"]) * 100,
"scanCount": int(job["scanCount"]),
"eventCount": int(job["eventCount"]),
"resultCount": int(job["resultCount"])}
if stats["isDone"] == "1":
break
sleep(2)
stats = {"isDone": job["isDone"],
"label": label,
"customer": customer,
"customerGuid": customerGuid,
"doneProgress": float(job["doneProgress"]) * 100}
if stats["isDone"] == "1":
break
sleep(2)
except binding.HTTPError as e:
pass
except AttributeError:
stats = {"isDone": job["isDone"],
"label": label,
"customer": customer,
"customerGuid": customerGuid,
"doneProgress": float(job["doneProgress"]) * 100}
if stats["isDone"] == "1":
break
sleep(2)
result_count = job['resultCount']
offset = 0
count = 50000
results_list = self.results_getter(job, label, customerGuid, search_headers, True, count, offset, result_count)
while len(results_list) < int(result_count) + 1:
offset += count
placeholder = self.results_getter(job, label, customerGuid, search_headers, False, count, offset, result_count)
results_list.extend(placeholder)
output = io.BytesIO()
writer = csv.writer(output, delimiter=',')
writer.writerows(results_list)
output_string = output.getvalue()
job.cancel()
return [label, output_string.replace('\r\n', '\n').replace('---', '')]
def results_getter(self, job, label, customerGuid, search_headers, first, count, offset, result_count):
# Get the results and display them
kwargs_paginate = {"count": count,
"offset": offset}
blocksearch_results = job.results(**kwargs_paginate)
results_list = []
reader = results.ResultsReader(blocksearch_results)
for result in reader:
if isinstance(result, results.Message):
# Diagnostic messages may be returned in the results
elif isinstance(result, dict):
# Normal events are returned as dicts
keys, values = [], []
for header in search_headers:
if header not in result.keys():
result[header] = ''
for key, value in result.items():
if key in search_headers:
keys.append(str(key))
values.append(str(value))
if not results_list == []:
results_list.append(values)
elif first:
results_list.append(keys)
results_list.append(values)
else:
results_list.append(values)
assert not reader.is_preview
return results_list
def searches(self, query_list):
if type(query_list) == dict:
query_list = [value for value in query_list.values()]
with closing(ThreadPool(processes=len(query_list))) as pool:
results = pool.map(self.search, query_list)
pool.terminate()
search_results = {item[0]: item[1] for item in results}
return search_results