Python试图重构(消除)长控制流

Python试图重构(消除)长控制流,python,refactoring,dry,python-2.5,control-flow,Python,Refactoring,Dry,Python 2.5,Control Flow,我从SQL和SQL查询中获取了大量数据,这需要很长时间才能运行。由于SQL查询需要很长时间才能运行,因此我将以最细粒度的形式从数据库中获取数据。然后,我循环浏览这些数据一次,并将其聚合为对我有用的形式 我的问题是我一次又一次地重复我自己。但是,我不确定重构此控制流的最佳方法。提前谢谢 def processClickOutData(cls, raw_data): singles = {} total={} absolute_total = 0 channels =

我从SQL和SQL查询中获取了大量数据,这需要很长时间才能运行。由于SQL查询需要很长时间才能运行,因此我将以最细粒度的形式从数据库中获取数据。然后,我循环浏览这些数据一次,并将其聚合为对我有用的形式

我的问题是我一次又一次地重复我自己。但是,我不确定重构此控制流的最佳方法。提前谢谢

def processClickOutData(cls, raw_data):
    singles = {}
    total={}
    absolute_total = 0
    channels = {}

    singles_true = {}
    total_true={}
    channels_true = {}
    absolute_total_true = 0

    list_channels = set([])
    list_tids = set([])


    total_position = {}
    total_position_true = {}
    tid_position = {}
    channel_position = {}
    channel_position_true = {}
    tid_position_true = {}

    for row in raw_data:
        gap=row[0]
        count=row[1]
        tid=row[2]
        prefered=row[3]
        channel=row[4]
        position=row[5]

        list_channels.add(channel)
        list_tids.add(tid)


        absolute_total += int(count)

        if total.has_key(gap):
            total[gap] += count
        else:
            total[gap] = count

        if singles.has_key(gap) and singles[gap].has_key(tid):
            singles[gap][tid] += count
        elif singles.has_key(gap):
            singles[gap][tid] = count
        else:
            singles[gap] = {}
            singles[gap][tid] = count

        if channels.has_key(gap) and channels[gap].has_key(channel):
            channels[gap][channel] += count
        elif channels.has_key(gap):
            channels[gap][channel] = count
        else:
            channels[gap] = {}
            channels[gap][channel] = count
        if total_position.has_key(position):
            total_position[position] += count
        else:
            total_position[position] = count
        if tid_position.has_key(position) and tid_position[position].has_key(tid):
            tid_position[position][tid] += count     
        elif tid_position.has_key(position):
            tid_position[position][tid] = count
        else:
            tid_position[position] = {}
            tid_position[position][tid] = count

        if channel_position.has_key(position) and channel_position[position].has_key(channel):
            channel_position[position][channel] += count     
        elif channel_position.has_key(position):
            channel_position[position][channel] = count
        else:
            channel_position[position] = {}
            channel_position[position][channel] = count

        if prefered == 0:
            absolute_total_true += count
            if total_true.has_key(gap):
                total_true[gap] += count
            else:
                total_true[gap] = count

            if singles_true.has_key(gap) and singles_true[gap].has_key(tid):
                singles_true[gap][tid] += count
            elif singles_true.has_key(gap):
                singles_true[gap][tid] = count
            else:
                singles_true[gap] = {}
                singles_true[gap][tid] = count

            if channels_true.has_key(gap) and channels_true[gap].has_key(channel):
               channels_true[gap][channel] += count
            elif channels_true.has_key(gap):
               channels_true[gap][channel] = count
            else:
               channels_true[gap] = {}
               channels_true[gap][channel] = count

            if total_position_true.has_key(position):
               total_position_true[position] += count
            else:
               total_position_true[position] = count 

            if tid_position_true.has_key(position) and tid_position_true[position].has_key(tid):
               tid_position_true[position][tid] += count     
            elif tid_position_true.has_key(position):
               tid_position_true[position][tid] = count
            else:
               tid_position_true[position] = {}
               tid_position_true[position][tid] = count

            if channel_position_true.has_key(position) and channel_position_true[position].has_key(channel):
               channel_position_true[position][channel] += count     
            elif channel_position_true.has_key(position):
               channel_position_true[position][channel] = count
            else:
               channel_position_true[position] = {}
               channel_position_true[position][channel] = count




    final_values = {"singles" : singles, "singles_true" : singles_true, "total" : total, "total_true": total_true, "absolute_total": absolute_total, "absolute_total_true": absolute_total_true, "channel_totals" : channels, "list_channels" : list_channels, "list_tids" : list_tids, "channel_totals_true" : channels_true,
                     "total_position" :  total_position, "total_position_true" : total_position_true, "tid_position" : tid_position, "channel_position" : channel_position, "tid_position_true" : tid_position_true, "channel_position_true" : channel_position_true }
    return final_values

你用来存储数据的整个结构可能是错误的,但因为我不知道你是如何使用它的,所以我不能帮你

通过使用,您可以摆脱所有
has\u key()
调用。注意
thedict.has_key(key)
已被弃用,您只需使用
key in the dict

看看我是如何更改
for
循环的——您可以在
for
语句中为名称赋值,无需单独执行

from collections import defaultdict

def processClickOutData(cls, raw_data):
    absolute_total = 0
    absolute_total_true = 0

    list_channels = set()
    list_tids = set()

    total = defaultdict(int)
    total_true = defaultdict(int)
    total_position = defaultdict(int)
    total_position_true = defaultdict(int)

    def defaultdict_int():
        return defaultdict(int)

    singles = defaultdict(defaultdict_int)
    singles_true = defaultdict(defaultdict_int)
    channels = defaultdict(defaultdict_int)
    channels_true = defaultdict(defaultdict_int)
    tid_position = defaultdict(defaultdict_int)
    tid_position_true = defaultdict(defaultdict_int)
    channel_position = defaultdict(defaultdict_int)
    channel_position_true = defaultdict(defaultdict_int)    

    for gap, count, prefered, channel, position in raw_data:
        list_channels.add(channel)
        list_tids.add(tid)

        absolute_total += count
        total[gap] += count
        singles[gap][tid] += count
        channels[gap][channel] += count
        total_position[position] += count
        tid_position[position][tid] += count
        channel_position[position][channel] += count

        if prefered == 0:
            absolute_total_true += count
            total_true[gap] += count
            singles_true[gap][tid] += count
            channels_true[gap][channel] += count
            total_position_true[position] += count
            tid_position_true[position][tid] += count
            channel_position_true[position][channel] += count




    final_values = {"singles" : singles, "singles_true" : singles_true, "total" : total, "total_true": total_true, "absolute_total": absolute_total, "absolute_total_true": absolute_total_true, "channel_totals" : channels, "list_channels" : list_channels, "list_tids" : list_tids, "channel_totals_true" : channels_true,
                     "total_position" :  total_position, "total_position_true" : total_position_true, "tid_position" : tid_position, "channel_position" : channel_position, "tid_position_true" : tid_position_true, "channel_position_true" : channel_position_true }
    return final_values
如果键不存在,则自动填充正确的默认值。这里有两种。在添加
int
s的地方,如果不存在
0
,则需要从它开始——这是
int
返回的结果,因此
defaultdict(int)
。在添加添加
int
s的字典时,需要使用一个函数返回
defaultdict(int)
,这就是
defaultdict\u int
的作用

编辑:建议的备选词典结构:

position = defaultdict(lambda: defaultdict(defaultdict_int))
gap = defaultdict(lambda: defaultdict(defaultdict_int))
absolute_total = 0

for gap, count, prefered, channel, position in raw_data:
    absolute_total += count

    posd = position[position]
    posd.setdefault('total', 0)
    posd['total'] += count
    posd['tid'][tid] += count
    posd['channel'][channel] += count

    gapd = gap[gap]
    gapd.setdefault('total', 0)
    gapd['total'] += count
    gapd['tid'][tid] += count
    gapd['channel'][channel] += count

\u true
版本也做同样的操作,您已经从12个
dict
变为4个。

您认为我用来存储数据的整个结构都错了是什么意思?输出是正确的,我正在对照执行相同功能的SQL查询检查数据。@Peter我在回答中添加了一个示例,说明了我的意思。基本上,有十二个这样的dict是相当混乱的,四个做完全相同的事情。谢谢!我对Python和编程都是新手,所以这非常有帮助。我投了赞成票,现在将接受。