Python 最小化：利用每个参数计算时间的不对称性_Python_Scipy_Minimization_Scipy Optimize

Python 最小化：利用每个参数计算时间的不对称性

python

Python 最小化：利用每个参数计算时间的不对称性,python,scipy,minimization,scipy-optimize,Python,Scipy,Minimization,Scipy Optimize,我有一个（相对标准的）最小化问题，我有一组实验数据（xdata，ydata），一个模型y=f（x，参数），我想提取参数。这将是我的目标，但函数的结构中有一个最小化的技巧。这实际上是一个两步计算，其中一些参数的计算速度比其他参数快得多下面是带有实体模型函数和数据的示例代码（在实际情况中，param_a和param_b包括多个参数，如果有必要的话）。此处根本不使用模型的结构： '''Runs, but not optimal.''' import numpy as np import scipy.

我有一个（相对标准的）最小化问题，我有一组实验数据（xdata，ydata），一个模型y=f（x，参数），我想提取参数。这将是我的目标，但函数的结构中有一个最小化的技巧。这实际上是一个两步计算，其中一些参数的计算速度比其他参数快得多

下面是带有实体模型函数和数据的示例代码（在实际情况中，

param_a

和

param_b

包括多个参数，如果有必要的话）。此处根本不使用模型的结构：

'''Runs, but not optimal.'''
import numpy as np
import scipy.optimize
from time import sleep

def quick(array_in, param):
    return param * array_in

def slow(array_in, param):
    sleep(0.1)
    return np.exp(param*array_in)

def model(x, param_a, param_b):
    intermediary = slow(x, param_a)
    return quick(intermediary, param_b)

p_actual = [0.5, 2.0]
xdata = np.linspace(0.0, 10.0)
ydata = model(xdata, *p_actual) + np.random.randn(*xdata.shape)

# The following is inefficient because parameter asymmetry is hidden to curve_fit
# Change the 0.1s sleep time in slow() to experiment
popt, _ = scipy.optimize.curve_fit(model, xdata, ydata, p0=np.asarray([1.0, 1.0]))
print(popt)  # about [0.5, 2.0]

我可以想象最小化算法可以利用问题的结构，其中更改

param_b

非常容易（如果以前使用相同的

param_a

调用模型）。在实际情况中，

slow

涉及到求解ODE，需要几分钟，而

quick

是numpy数组的加权和，所需时间（至少）要少四个数量级

下面是这个想法的一个实现，包括对一个非平凡拟合问题的两种方法进行比较的测试。结果表明，在大约50%的情况下，“改进”版本调用slow（）

*的次数更多*

（有时是为了收敛到更好的匹配，但有时不是）；这可能是由于scipy.optimize例程和问题的能量景观的交互作用。我怀疑一个有效的解决方案需要比我投入更多的数学思考

# -*- coding: utf-8 -*-
import numpy as np
from inspect import getfullargspec
import scipy.optimize
import random
from time import sleep
import matplotlib.pyplot as plt


def _number_of_arguments(f):
    '''
    f must be a function taking a fixed number >= 1 of non-keyword arguments, because that's what scipy.optimize.curve_fit operates on. If so, the number of those arguments is returned. Otherwise, an error is thrown.
    '''
    fas = getfullargspec(f)
    if fas.varargs is not None:
        raise ValueError('Function accepts an arbitrary number of positional arguments.', f)
    if fas.varkw is not None or fas.kwonlyargs:
        raise ValueError('Function accepts keyword arguments.', f)
    n = len(fas.args)
    if n == 0:
        raise ValueError('Function takes no arguments, it should take at least one (x).')
    return n

def _score(ydata, yest, sigma=None):
    sigma = sigma if sigma is not None else np.ones(yest.shape)
    return np.sum(((yest - ydata)/sigma)**2)

def _optimize_std(fslow, fquick, xdata, ydata, ps_guess=None, pq_guess=None, sigma=None):

    Nps = _number_of_arguments(fslow) - 1
    Npq = _number_of_arguments(fquick) - 1
    assert ps_guess is None or Nps == len(ps_guess)
    assert pq_guess is None or Npq == len(pq_guess)

    assert xdata.shape == ydata.shape
    assert sigma is None or sigma.shape == ydata.shape

    def f(x, *p):
        assert len(p) == Nps + Npq
        return fquick( fslow(x, *p[:Nps]), *p[Nps:])

    ps_guess = np.array(ps_guess) if ps_guess is not None else np.ones(Nps)
    pq_guess = np.array(pq_guess) if pq_guess is not None else np.ones(Npq)
    p_guess = np.concatenate((ps_guess, pq_guess))
    p_opt, _ = scipy.optimize.curve_fit(f, xdata, ydata, p0=p_guess, sigma=sigma)

    return p_opt[:Nps], p_opt[Nps:]

def optimize(fslow, fquick, xdata, ydata, ps_guess=None, pq_guess=None, sigma=None):
    '''Two-step curve fitting for two-part function.

    Two functions `fquick: interm, *pq -> y` and `fslow: x, *ps -> interm`
    define together `f: x, *ps, *pq -> y = fquick( fslow(x, *pq), *ps)`.
    Assuming that `f: xdata -> ydata` one can fit the ps, pq parameters; we
    return ps0, pq0 such that f(xdata, ps0, pq0) ~ ydata.

    This whole function should have an output equivalent to that of the
    standard scipy.optimize.curve_fit. However, internally, it is written so
    that the fewest possibles calls to fslow() are made (at the expense of more
    calls to fquick). That is possible because the intermediary calculation
    `interm = fslow(x, *ps)` can be reused for multiple calls to `y =
    fquick(interm, *pq)`. If the latter is really quick, it can make sense to
    fully optimize the `fquick` part at each step of the optimization for
    `fslow`; this increases greatly the number of calls to `fquick`, but the
    optimization that is actually costly in function calls is madek with fewer
    parameters.

    The first argument of fslow() and the output of fquick() must be 1d arrays
    with consistent size. The output of fslow() must be consumed as the first
    argument of fquick(), but can be any kind of object.

    Args:
        fslow (callable): function with two arguments.
            fslow: interm, *ps -> y
        fquick (callable): function with two arguments.
            fquick: x, *pq -> interm
        xdata (np.array): evaluation points.
        ydata (np.array): values to fit.

    Returns: : optimal parameters for fslow : optimal parameters for fquick
    '''

    assert xdata.shape == ydata.shape
    assert sigma is None or sigma.shape == ydata.shape

    Nps = _number_of_arguments(fslow) - 1
    Npq = _number_of_arguments(fquick) - 1
    assert ps_guess is None or Nps == len(ps_guess)
    assert pq_guess is None or Npq == len(pq_guess)

    cur_ps = np.array(ps_guess, copy=True) if ps_guess is not None else np.ones(Nps)
    cur_pq = np.array(pq_guess, copy=True) if pq_guess is not None else np.ones(Npq)

    def f_with_quickopt(x, *ps, sigma=None):
        '''
        That function keeps the state of pq between calls via the pq attribute.
        That attribute must hence be set before the first call to the function.
        For more details on the method, see
        https://python-forum.io/Thread-function-state-between-calls?pid=38969#pid38969
        '''
        try:
            f_with_quickopt.pq
        except AttributeError as exc:
            raise RuntimeError('You must define attribute pq.') from exc

        interm = fslow(x, *ps)

        def f_quick_score_to_minimize(pq):
            yest = fquick(interm, *pq)  # pq not long enough?
            return _score(ydata, yest, sigma=sigma)

        solve_quick = scipy.optimize.minimize(
            f_quick_score_to_minimize, f_with_quickopt.pq
            )

        f_with_quickopt.pq = solve_quick.x

        return fquick(interm, *f_with_quickopt.pq)

    f_with_quickopt.pq = cur_pq
    # Main optimization call - here's what should take most time
    ps_opt, _ = scipy.optimize.curve_fit(f_with_quickopt, xdata, ydata, p0=cur_ps, sigma=sigma)

    pq_opt = f_with_quickopt.pq


    return ps_opt, pq_opt

if __name__ == '__main__':

    def slow(x, a, b):
        slow.Ncalls += 1
        sleep(0.01)
        return a*x**2 + b * x

    def quick(x, a, b):
        quick.Ncalls += 1
        return a*x + b*np.cos(x)

    slow.Ncalls = 0
    quick.Ncalls = 0

    def model(x, ps, pq):
        return quick(slow(x, *ps), *pq)

    ps_actual = 1.0 + np.random.random((2, ))
    pq_actual = 1.0 + np.random.random((2, ))

    xdata = np.linspace(-0.0, 2.0, num=1000)
    y_theory = model(xdata, ps_actual, pq_actual)
    ydata = y_theory + np.random.randn(*xdata.shape)/5

    plt.figure()
    plt.plot(xdata, ydata, 'k.', label='data')
    plt.plot(xdata, y_theory, 'r-', label='actual')
    for (lab, opti) in [
        ('std: {q} quick(), {s} slow()', _optimize_std),
        ('asym: {q} quick(), {s} slow()', optimize)
        ]:
        slow.Ncalls = 0
        quick.Ncalls = 0

        ps, pq = opti(slow, quick, xdata, ydata)

        leg = lab.format(q=quick.Ncalls, s=slow.Ncalls)
        yplot = model(xdata, ps, pq)
        plt.plot(xdata, yplot, label=leg)

    plt.legend()
    plt.show()

之前的

pq

最小化（可能使用了非常不同的

ps

）的值是否是您的最佳猜测？就保持接近原始算法而言，它是“最佳”的（在步骤N+1开始时，您对

pq

的最佳猜测是其在步骤N结束时的值，因为您同时对

pq

和

ps

进行操作）。我假设它在以下方面也是“最佳”的：（1）从相同局部最小值的吸引池开始；（2）从更接近局部最小值开始，但我认为在没有关于要最小化的函数的信息的情况下，没有办法确定。直接方法让优化驱动程序一起选择下一个

ps

和

pq

，并且通常在每一步都会改变。（这不一定会使它更好，只是不相等。）上一个

pq

最小化（可能使用了非常不同的

ps

）的值是您的最佳猜测吗？就保持接近原始算法而言，它是“最佳”的（在步骤N+1开始时，您对

pq

的最佳猜测是其在步骤N结束时的值，因为您同时对

pq

和

ps

ps

和

pq

，并且通常在每一步都会改变。（这并不一定会使它更好，只是不相等。）