Python试图重构(消除)长控制流
我从SQL和SQL查询中获取了大量数据,这需要很长时间才能运行。由于SQL查询需要很长时间才能运行,因此我将以最细粒度的形式从数据库中获取数据。然后,我循环浏览这些数据一次,并将其聚合为对我有用的形式 我的问题是我一次又一次地重复我自己。但是,我不确定重构此控制流的最佳方法。提前谢谢Python试图重构(消除)长控制流,python,refactoring,dry,python-2.5,control-flow,Python,Refactoring,Dry,Python 2.5,Control Flow,我从SQL和SQL查询中获取了大量数据,这需要很长时间才能运行。由于SQL查询需要很长时间才能运行,因此我将以最细粒度的形式从数据库中获取数据。然后,我循环浏览这些数据一次,并将其聚合为对我有用的形式 我的问题是我一次又一次地重复我自己。但是,我不确定重构此控制流的最佳方法。提前谢谢 def processClickOutData(cls, raw_data): singles = {} total={} absolute_total = 0 channels =
def processClickOutData(cls, raw_data):
singles = {}
total={}
absolute_total = 0
channels = {}
singles_true = {}
total_true={}
channels_true = {}
absolute_total_true = 0
list_channels = set([])
list_tids = set([])
total_position = {}
total_position_true = {}
tid_position = {}
channel_position = {}
channel_position_true = {}
tid_position_true = {}
for row in raw_data:
gap=row[0]
count=row[1]
tid=row[2]
prefered=row[3]
channel=row[4]
position=row[5]
list_channels.add(channel)
list_tids.add(tid)
absolute_total += int(count)
if total.has_key(gap):
total[gap] += count
else:
total[gap] = count
if singles.has_key(gap) and singles[gap].has_key(tid):
singles[gap][tid] += count
elif singles.has_key(gap):
singles[gap][tid] = count
else:
singles[gap] = {}
singles[gap][tid] = count
if channels.has_key(gap) and channels[gap].has_key(channel):
channels[gap][channel] += count
elif channels.has_key(gap):
channels[gap][channel] = count
else:
channels[gap] = {}
channels[gap][channel] = count
if total_position.has_key(position):
total_position[position] += count
else:
total_position[position] = count
if tid_position.has_key(position) and tid_position[position].has_key(tid):
tid_position[position][tid] += count
elif tid_position.has_key(position):
tid_position[position][tid] = count
else:
tid_position[position] = {}
tid_position[position][tid] = count
if channel_position.has_key(position) and channel_position[position].has_key(channel):
channel_position[position][channel] += count
elif channel_position.has_key(position):
channel_position[position][channel] = count
else:
channel_position[position] = {}
channel_position[position][channel] = count
if prefered == 0:
absolute_total_true += count
if total_true.has_key(gap):
total_true[gap] += count
else:
total_true[gap] = count
if singles_true.has_key(gap) and singles_true[gap].has_key(tid):
singles_true[gap][tid] += count
elif singles_true.has_key(gap):
singles_true[gap][tid] = count
else:
singles_true[gap] = {}
singles_true[gap][tid] = count
if channels_true.has_key(gap) and channels_true[gap].has_key(channel):
channels_true[gap][channel] += count
elif channels_true.has_key(gap):
channels_true[gap][channel] = count
else:
channels_true[gap] = {}
channels_true[gap][channel] = count
if total_position_true.has_key(position):
total_position_true[position] += count
else:
total_position_true[position] = count
if tid_position_true.has_key(position) and tid_position_true[position].has_key(tid):
tid_position_true[position][tid] += count
elif tid_position_true.has_key(position):
tid_position_true[position][tid] = count
else:
tid_position_true[position] = {}
tid_position_true[position][tid] = count
if channel_position_true.has_key(position) and channel_position_true[position].has_key(channel):
channel_position_true[position][channel] += count
elif channel_position_true.has_key(position):
channel_position_true[position][channel] = count
else:
channel_position_true[position] = {}
channel_position_true[position][channel] = count
final_values = {"singles" : singles, "singles_true" : singles_true, "total" : total, "total_true": total_true, "absolute_total": absolute_total, "absolute_total_true": absolute_total_true, "channel_totals" : channels, "list_channels" : list_channels, "list_tids" : list_tids, "channel_totals_true" : channels_true,
"total_position" : total_position, "total_position_true" : total_position_true, "tid_position" : tid_position, "channel_position" : channel_position, "tid_position_true" : tid_position_true, "channel_position_true" : channel_position_true }
return final_values
你用来存储数据的整个结构可能是错误的,但因为我不知道你是如何使用它的,所以我不能帮你 通过使用,您可以摆脱所有
has\u key()
调用。注意thedict.has_key(key)
已被弃用,您只需使用key in the dict
看看我是如何更改for
循环的——您可以在for
语句中为名称赋值,无需单独执行
from collections import defaultdict
def processClickOutData(cls, raw_data):
absolute_total = 0
absolute_total_true = 0
list_channels = set()
list_tids = set()
total = defaultdict(int)
total_true = defaultdict(int)
total_position = defaultdict(int)
total_position_true = defaultdict(int)
def defaultdict_int():
return defaultdict(int)
singles = defaultdict(defaultdict_int)
singles_true = defaultdict(defaultdict_int)
channels = defaultdict(defaultdict_int)
channels_true = defaultdict(defaultdict_int)
tid_position = defaultdict(defaultdict_int)
tid_position_true = defaultdict(defaultdict_int)
channel_position = defaultdict(defaultdict_int)
channel_position_true = defaultdict(defaultdict_int)
for gap, count, prefered, channel, position in raw_data:
list_channels.add(channel)
list_tids.add(tid)
absolute_total += count
total[gap] += count
singles[gap][tid] += count
channels[gap][channel] += count
total_position[position] += count
tid_position[position][tid] += count
channel_position[position][channel] += count
if prefered == 0:
absolute_total_true += count
total_true[gap] += count
singles_true[gap][tid] += count
channels_true[gap][channel] += count
total_position_true[position] += count
tid_position_true[position][tid] += count
channel_position_true[position][channel] += count
final_values = {"singles" : singles, "singles_true" : singles_true, "total" : total, "total_true": total_true, "absolute_total": absolute_total, "absolute_total_true": absolute_total_true, "channel_totals" : channels, "list_channels" : list_channels, "list_tids" : list_tids, "channel_totals_true" : channels_true,
"total_position" : total_position, "total_position_true" : total_position_true, "tid_position" : tid_position, "channel_position" : channel_position, "tid_position_true" : tid_position_true, "channel_position_true" : channel_position_true }
return final_values
如果键不存在,则自动填充正确的默认值。这里有两种。在添加int
s的地方,如果不存在0
,则需要从它开始——这是int
返回的结果,因此defaultdict(int)
。在添加添加int
s的字典时,需要使用一个函数返回defaultdict(int)
,这就是defaultdict\u int
的作用
编辑:建议的备选词典结构:
position = defaultdict(lambda: defaultdict(defaultdict_int))
gap = defaultdict(lambda: defaultdict(defaultdict_int))
absolute_total = 0
for gap, count, prefered, channel, position in raw_data:
absolute_total += count
posd = position[position]
posd.setdefault('total', 0)
posd['total'] += count
posd['tid'][tid] += count
posd['channel'][channel] += count
gapd = gap[gap]
gapd.setdefault('total', 0)
gapd['total'] += count
gapd['tid'][tid] += count
gapd['channel'][channel] += count
对
\u true
版本也做同样的操作,您已经从12个dict
变为4个。您认为我用来存储数据的整个结构都错了是什么意思?输出是正确的,我正在对照执行相同功能的SQL查询检查数据。@Peter我在回答中添加了一个示例,说明了我的意思。基本上,有十二个这样的dict是相当混乱的,四个做完全相同的事情。谢谢!我对Python和编程都是新手,所以这非常有帮助。我投了赞成票,现在将接受。