Python字符串格式：为'；%'；比'更高效；格式'；功能？_Python_Performance

Python字符串格式：为'；%'；比'更高效；格式'；功能？

python performance

Python字符串格式：为'；%'；比'更高效；格式'；功能？,python,performance,Python,Performance,我想通过比较不同的变量，在Python中构建一个字符串：使用+连接（称为“加号”）使用% 使用“”。加入（列表）使用格式功能使用“{0 在每个场景中，我都得出了以下结论串联似乎是最快的方法之一使用%格式化要比使用格式化功能格式化快得多我认为格式比%（例如in）好得多，%几乎被弃用因此，我有几个问题： %真的比格式快吗若然，原因为何为什么“{}{}.format（var1，var2）比“{0.attribute1}{0.attribute2}.format（object）

我想通过比较不同的变量，在Python中构建一个字符串：

使用
```
+
```
连接（称为“加号”）
使用
```
%
```
使用
```
“”。加入（列表）
```
使用
```
格式
```
功能

使用

“{0
在每个场景中，我都得出了以下结论

串联似乎是最快的方法之一
使用%
格式化要比使用格式化
功能格式化快得多

我认为格式
比%
（例如in）好得多，%
几乎被弃用
因此，我有几个问题：
%
真的比格式快吗

若然，原因为何
为什么“{}{}.format（var1，var2）
比“{0.attribute1}{0.attribute2}.format（object）
更有效

作为参考，我使用以下代码来度量不同的计时
import time
def timing(f, n, show, *args):
    if show: print f.__name__ + ":\t",
    r = range(n/10)
    t1 = time.clock()
    for i in r:
        f(*args); f(*args); f(*args); f(*args); f(*args); f(*args); f(*args); f(*args); f(*args); f(*args)
    t2 = time.clock()
    timing = round(t2-t1, 3)
    if show: print timing
    return timing
    

class values(object):
    def __init__(self, a, b, c="", d=""):
        self.a = a
        self.b = b
        self.c = c
        self.d = d

    
def test_plus(a, b):
    return a + "-" + b

def test_percent(a, b):
    return "%s-%s" % (a, b)

def test_join(a, b):
    return ''.join([a, '-', b])
        
def test_format(a, b):
    return "{}-{}".format(a, b)

def test_formatC(val):
    return "{0.a}-{0.b}".format(val)

    
def test_plus_long(a, b, c, d):
    return a + "-" + b + "-" + c + "-" + d

def test_percent_long(a, b, c, d):
    return "%s-%s-%s-%s" % (a, b, c, d)
        
def test_join_long(a, b, c, d):
    return ''.join([a, '-', b, '-', c, '-', d])
    
def test_format_long(a, b, c, d):
    return "{0}-{1}-{2}-{3}".format(a, b, c, d)

def test_formatC_long(val):
    return "{0.a}-{0.b}-{0.c}-{0.d}".format(val)

    
def test_plus_long2(a, b, c, d):
    return a + "-" + b + "-" + c + "-" + d + "-" + a + "-" + b + "-" + c + "-" + d

def test_percent_long2(a, b, c, d):
    return "%s-%s-%s-%s-%s-%s-%s-%s" % (a, b, c, d, a, b, c, d)
    
def test_join_long2(a, b, c, d):
    return ''.join([a, '-', b, '-', c, '-', d, '-', a, '-', b, '-', c, '-', d])
            
def test_format_long2(a, b, c, d):
    return "{0}-{1}-{2}-{3}-{0}-{1}-{2}-{3}".format(a, b, c, d)

def test_formatC_long2(val):
    return "{0.a}-{0.b}-{0.c}-{0.d}-{0.a}-{0.b}-{0.c}-{0.d}".format(val)


def test_plus_superlong(lst):
    string = ""
    for i in lst:
        string += str(i)
    return string
    

def test_join_superlong(lst):
    return "".join([str(i) for i in lst])
    

def mean(numbers):
    return float(sum(numbers)) / max(len(numbers), 1)
        

nb_times = int(1e6)
n = xrange(5)
lst_numbers = xrange(1000)
from collections import defaultdict
metrics = defaultdict(list)
list_functions = [
    test_plus, test_percent, test_join, test_format, test_formatC,
    test_plus_long, test_percent_long, test_join_long, test_format_long, test_formatC_long,
    test_plus_long2, test_percent_long2, test_join_long2, test_format_long2, test_formatC_long2,
    # test_plus_superlong, test_join_superlong,
]
val = values("123", "456", "789", "0ab")
for i in n:
    for f in list_functions:
        print ".",
        name = f.__name__
        if "formatC" in name:
            t = timing(f, nb_times, False, val)
        elif '_long' in name:
            t = timing(f, nb_times, False, "123", "456", "789", "0ab")
        elif '_superlong' in name:
            t = timing(f, nb_times, False, lst_numbers)
        else:
            t = timing(f, nb_times, False, "123", "456")
        metrics[name].append(t) 

# Get Average
print "\n===AVERAGE OF TIMINGS==="
for f in list_functions:
    name = f.__name__
    timings = metrics[name]
    print "{:>20}:\t{:0.5f}".format(name, mean(timings))

是的，%
字符串格式比.format
方法快
很可能（这可能有更好的解释），因为%
是一种语法符号（因此执行速度很快），而.format
至少涉及一个额外的方法调用
因为属性值访问还涉及一个额外的方法调用，即\uuuu getattr\uuuu
我使用timeit
对各种格式化方法进行了稍微好一点的分析（在Python3.8.2上），结果如下-
我无法超越num_vars=250
，因为timeit
的最大参数（255）限制
tl；dr-Python字符串格式性能：f-strings
速度最快，也更优雅，但有时（由于一些&Py3.6+），您可能必须根据需要使用其他格式选项。
使用timeit
而不是您的自定义函数，第一次执行可能很慢，但后续函数执行更快，但实际上您只需调用该函数一次。@MaximilianPeters提到，对于get，您应该使用timeit
谢谢大家。我检查过代码< TimeTime/code >，但是那天我应该是高的，因为我相信它只支持Python 3。x，我主要使用2.7考虑从Python 3.6添加到你的分析。比较这些结果也很有趣。好代码！Python：在哪里可以建立字符串到U。se对构建字符串的各种方法进行计时测试…然后导入一个外部库，该库使用它自己的\uuuuu str\uuuuu
来构建一个自定义对象，该对象构建一个字符串（并且很可能使用在流程中构建字符串的字符串来构建该字符串）在所有计时测试的结果中。您是否知道或至少知道为什么f-string
和format
版本对于一个变量比两个变量花费的时间更多？1-vars案例的设置代码已被破坏。它解析为x0=（'0'，）
，这不会解压元组。x0，=（'0'，）
是正确的。使用setup\u str=f'{vars}，={vals\u str}
和setup\u int=f'{vars}，={vals\u int}
（或将，
附加到vars）而是强制解包。@ExternalCompilerError该问题是由于MisterMiyagi指出的不正确的设置造成的。我已经修复了它&现在的结果与预期的一样。
+-----------------+-------+-------+-------+-------+-------+--------+
| Type \ num_vars |   1   |   2   |   5   |  10   |  50   |  250   |
+-----------------+-------+-------+-------+-------+-------+--------+
|    f_str_str    | 0.056 | 0.063 | 0.115 | 0.173 | 0.754 | 3.717  |
+-----------------+-------+-------+-------+-------+-------+--------+
|    f_str_int    | 0.055 | 0.148 | 0.354 | 0.656 | 3.186 | 15.747 |
+-----------------+-------+-------+-------+-------+-------+--------+
|   concat_str    | 0.012 | 0.044 | 0.169 | 0.333 | 1.888 | 10.231 |
+-----------------+-------+-------+-------+-------+-------+--------+
|    pct_s_str    | 0.091 | 0.114 | 0.182 | 0.313 | 1.213 | 6.019  |
+-----------------+-------+-------+-------+-------+-------+--------+
|    pct_s_int    | 0.09  | 0.141 | 0.248 | 0.479 | 2.179 | 10.768 |
+-----------------+-------+-------+-------+-------+-------+--------+
| dot_format_str  | 0.143 | 0.157 | 0.251 | 0.461 | 1.745 | 8.259  |
+-----------------+-------+-------+-------+-------+-------+--------+
| dot_format_int  | 0.141 | 0.192 | 0.333 | 0.62  | 2.735 | 13.298 |
+-----------------+-------+-------+-------+-------+-------+--------+
| dot_format2_str | 0.159 | 0.195 | 0.33  | 0.634 | 3.494 | 18.975 |
+-----------------+-------+-------+-------+-------+-------+--------+
| dot_format2_int | 0.158 | 0.227 | 0.422 | 0.762 | 4.337 | 25.498 |
+-----------------+-------+-------+-------+-------+-------+--------+
from timeit import timeit
from beautifultable import BeautifulTable  # pip install beautifultable

times = {}

for num_vars in (250, 50, 10, 5, 2, 1):
    f_str = "f'{" + '}{'.join([f'x{i}' for i in range(num_vars)]) + "}'"
    # "f'{x0}{x1}'"
    concat = '+'.join([f'x{i}' for i in range(num_vars)])
    # 'x0+x1'
    pct_s = '"' + '%s'*num_vars + '" % (' + ','.join([f'x{i}' for i in range(num_vars)]) + ')'
    # '"%s%s" % (x0,x1)'
    dot_format = '"' + '{}'*num_vars + '".format(' + ','.join([f'x{i}' for i in range(num_vars)]) + ')'
    # '"{}{}".format(x0,x1)'
    dot_format2 = '"{' + '}{'.join([f'{i}' for i in range(num_vars)]) + '}".format(' + ','.join([f'x{i}' for i in range(num_vars)]) + ')'
    # '"{0}{1}".format(x0,x1)'

    vars = ','.join([f'x{i}' for i in range(num_vars)])
    vals_str = tuple(map(str, range(num_vars))) if num_vars > 1 else '0'
    setup_str = f'{vars} = {vals_str}'
    # "x0,x1 = ('0', '1')"
    vals_int = tuple(range(num_vars)) if num_vars > 1 else 0
    setup_int = f'{vars} = {vals_int}'
    # 'x0,x1 = (0, 1)'

    times[num_vars] = {
        'f_str_str': timeit(f_str, setup_str),
        'f_str_int': timeit(f_str, setup_int),
        'concat_str': timeit(concat, setup_str),
        # 'concat_int': timeit(concat, setup_int), # this will be summation, not concat
        'pct_s_str': timeit(pct_s, setup_str),
        'pct_s_int': timeit(pct_s, setup_int),
        'dot_format_str': timeit(dot_format, setup_str),
        'dot_format_int': timeit(dot_format, setup_int),
        'dot_format2_str': timeit(dot_format2, setup_str),
        'dot_format2_int': timeit(dot_format2, setup_int),
    }

table = BeautifulTable()
table.column_headers = ['Type \ num_vars'] + list(map(str, times.keys()))
# Order is preserved, so I didn't worry much
for key in ('f_str_str', 'f_str_int', 'concat_str', 'pct_s_str', 'pct_s_int', 'dot_format_str', 'dot_format_int', 'dot_format2_str', 'dot_format2_int'):
    table.append_row([key] + [times[num_vars][key] for num_vars in (1, 2, 5, 10, 50, 250)])
print(table)