Python 大二项式系数的故障计算

Python 大二项式系数的故障计算,python,python-2.7,plot,binomial-coefficients,Python,Python 2.7,Plot,Binomial Coefficients,简介。 import matplotlib.pyplot as plt import numpy as np %matplotlib inline #Returns the log of "n choose k" calculated with 2nd order Stirling's approximation def l_cb(n, k): if (k > n) or (n < 0) or (n < 0): print "Invalid value

简介。

import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline

#Returns the log of "n choose k" calculated with 2nd order Stirling's approximation
def l_cb(n, k):
    if (k > n) or (n < 0) or (n < 0):
        print "Invalid values for the binomial coefficient:", "n =", n, ", k =", k, "."
        return 0.0
    if (k == n) or (k == 0) or (n == 0):
        return 0.0
    A = (n + 0.5) * np.log(n) - (k + 0.5) * np.log(k) - (n - k + 0.5) * np.log(n - k)
    B = np.log(1 + 1 / (12.0 * n)) - np.log(1 + 1 / (12.0 * k)) - np.log(1 + 1 / (12.0 * (n - k)))
    return - 1/2 * np.log(2 * np.pi) + A + B

K = 2.24e28
k = 2.24e27
N = 2.7e25

#Mathematical maximum of P. np.log(MAX) is about 56. l_P is way too big.
MAX = (k + 1) * (N + 1) / (K + 2)
#Mathematical average of n
AVG = N * k / K
#Mathematical standard deviation of P.
SD = np.sqrt(N * k * (K - N) * (K - k) / K ** 2 / (K - 1))

n = np.linspace(AVG - 50e12, AVG + 50e12, 1001)
l_P = np.zeros(len(n))

#Calculating log(P).
for i in xrange(len(l_P)):
    l_P[i] = l_cb(N, n[i]) + l_cb(K - N, k - n[i]) - l_cb(K, k)

#Marking AVG, AVG - SD, AVG + SD
y = np.linspace(-4e14, 5e14, len(n))
x_AVG = np.ones(len(n))
x_SD_L = np.ones(len(n)) - SD / AVG
x_SD_R = np.ones(len(n)) + SD / AVG

plt.plot(n / AVG, l_P, x_AVG, y, 'r', x_SD_L, y, 'k', x_SD_R, y, 'k')
plt.xlabel('n / AVG')
plt.ylabel('log(P)')
我想用IPython绘制一个超几何分布图。分布的概率函数包含三个二项式系数

由于我将在系数中输入的值非常大,例如1e28,我决定用我自己的函数来计算二项式系数,在这里我使用斯特林近似的二阶

由于二项式系数太大,无法放入变量直接相乘,因此我决定计算它们的对数,并将它们相加。为了获得最终概率,我只需将结果放入
exp
函数中。由于概率是相对“正常”大小(其最大值为2.7e24),因此应该不会再有问题了。。。除了有

问题。

import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline

#Returns the log of "n choose k" calculated with 2nd order Stirling's approximation
def l_cb(n, k):
    if (k > n) or (n < 0) or (n < 0):
        print "Invalid values for the binomial coefficient:", "n =", n, ", k =", k, "."
        return 0.0
    if (k == n) or (k == 0) or (n == 0):
        return 0.0
    A = (n + 0.5) * np.log(n) - (k + 0.5) * np.log(k) - (n - k + 0.5) * np.log(n - k)
    B = np.log(1 + 1 / (12.0 * n)) - np.log(1 + 1 / (12.0 * k)) - np.log(1 + 1 / (12.0 * (n - k)))
    return - 1/2 * np.log(2 * np.pi) + A + B

K = 2.24e28
k = 2.24e27
N = 2.7e25

#Mathematical maximum of P. np.log(MAX) is about 56. l_P is way too big.
MAX = (k + 1) * (N + 1) / (K + 2)
#Mathematical average of n
AVG = N * k / K
#Mathematical standard deviation of P.
SD = np.sqrt(N * k * (K - N) * (K - k) / K ** 2 / (K - 1))

n = np.linspace(AVG - 50e12, AVG + 50e12, 1001)
l_P = np.zeros(len(n))

#Calculating log(P).
for i in xrange(len(l_P)):
    l_P[i] = l_cb(N, n[i]) + l_cb(K - N, k - n[i]) - l_cb(K, k)

#Marking AVG, AVG - SD, AVG + SD
y = np.linspace(-4e14, 5e14, len(n))
x_AVG = np.ones(len(n))
x_SD_L = np.ones(len(n)) - SD / AVG
x_SD_R = np.ones(len(n)) + SD / AVG

plt.plot(n / AVG, l_P, x_AVG, y, 'r', x_SD_L, y, 'k', x_SD_R, y, 'k')
plt.xlabel('n / AVG')
plt.ylabel('log(P)')
我提到的“结果”是概率的对数,其值在-6.2e24到1.3e14之间,比它应该的大得多。相比之下,数学最大值的对数约为56

另一个问题是,放大时,绘图上的曲线非常参差不齐。放大后,一切看起来都很好。曲线平滑,其最大值位于分布的平均值处:

但当我放大概率函数的大部分峰值的平均值时,因为标准偏差非常小,我得到:

红线表示平均值,黑线表示平均值+/-标准偏差。 虽然它看起来很漂亮,但它不是我需要的平滑曲线

问题。

import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline

#Returns the log of "n choose k" calculated with 2nd order Stirling's approximation
def l_cb(n, k):
    if (k > n) or (n < 0) or (n < 0):
        print "Invalid values for the binomial coefficient:", "n =", n, ", k =", k, "."
        return 0.0
    if (k == n) or (k == 0) or (n == 0):
        return 0.0
    A = (n + 0.5) * np.log(n) - (k + 0.5) * np.log(k) - (n - k + 0.5) * np.log(n - k)
    B = np.log(1 + 1 / (12.0 * n)) - np.log(1 + 1 / (12.0 * k)) - np.log(1 + 1 / (12.0 * (n - k)))
    return - 1/2 * np.log(2 * np.pi) + A + B

K = 2.24e28
k = 2.24e27
N = 2.7e25

#Mathematical maximum of P. np.log(MAX) is about 56. l_P is way too big.
MAX = (k + 1) * (N + 1) / (K + 2)
#Mathematical average of n
AVG = N * k / K
#Mathematical standard deviation of P.
SD = np.sqrt(N * k * (K - N) * (K - k) / K ** 2 / (K - 1))

n = np.linspace(AVG - 50e12, AVG + 50e12, 1001)
l_P = np.zeros(len(n))

#Calculating log(P).
for i in xrange(len(l_P)):
    l_P[i] = l_cb(N, n[i]) + l_cb(K - N, k - n[i]) - l_cb(K, k)

#Marking AVG, AVG - SD, AVG + SD
y = np.linspace(-4e14, 5e14, len(n))
x_AVG = np.ones(len(n))
x_SD_L = np.ones(len(n)) - SD / AVG
x_SD_R = np.ones(len(n)) + SD / AVG

plt.plot(n / AVG, l_P, x_AVG, y, 'r', x_SD_L, y, 'k', x_SD_R, y, 'k')
plt.xlabel('n / AVG')
plt.ylabel('log(P)')
有人能解释为什么(1)值如此之大,(2)为什么曲线参差不齐以及我如何修正它们吗

代码。

import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline

#Returns the log of "n choose k" calculated with 2nd order Stirling's approximation
def l_cb(n, k):
    if (k > n) or (n < 0) or (n < 0):
        print "Invalid values for the binomial coefficient:", "n =", n, ", k =", k, "."
        return 0.0
    if (k == n) or (k == 0) or (n == 0):
        return 0.0
    A = (n + 0.5) * np.log(n) - (k + 0.5) * np.log(k) - (n - k + 0.5) * np.log(n - k)
    B = np.log(1 + 1 / (12.0 * n)) - np.log(1 + 1 / (12.0 * k)) - np.log(1 + 1 / (12.0 * (n - k)))
    return - 1/2 * np.log(2 * np.pi) + A + B

K = 2.24e28
k = 2.24e27
N = 2.7e25

#Mathematical maximum of P. np.log(MAX) is about 56. l_P is way too big.
MAX = (k + 1) * (N + 1) / (K + 2)
#Mathematical average of n
AVG = N * k / K
#Mathematical standard deviation of P.
SD = np.sqrt(N * k * (K - N) * (K - k) / K ** 2 / (K - 1))

n = np.linspace(AVG - 50e12, AVG + 50e12, 1001)
l_P = np.zeros(len(n))

#Calculating log(P).
for i in xrange(len(l_P)):
    l_P[i] = l_cb(N, n[i]) + l_cb(K - N, k - n[i]) - l_cb(K, k)

#Marking AVG, AVG - SD, AVG + SD
y = np.linspace(-4e14, 5e14, len(n))
x_AVG = np.ones(len(n))
x_SD_L = np.ones(len(n)) - SD / AVG
x_SD_R = np.ones(len(n)) + SD / AVG

plt.plot(n / AVG, l_P, x_AVG, y, 'r', x_SD_L, y, 'k', x_SD_R, y, 'k')
plt.xlabel('n / AVG')
plt.ylabel('log(P)')
导入matplotlib.pyplot作为plt
将numpy作为np导入
%matplotlib内联
#返回使用二阶斯特林近似计算的“n选择k”的对数
def l_cb(n,k):
如果(k>n)或(n<0)或(n<0):
打印“二项式系数的无效值:,“n=,n,,,k=,k.”
返回0.0
如果(k==n)或(k==0)或(n==0):
返回0.0
A=(n+0.5)*np.log(n)-(k+0.5)*np.log(k)-(n-k+0.5)*np.log(n-k)
B=np.log(1+1/(12.0*n))-np.log(1+1/(12.0*k))-np.log(1+1/(12.0*n-k)))
返回-1/2*np.log(2*np.pi)+A+B
K=2.24e28
k=2.24e27
N=2.7e25
#P.np.log(MAX)的数学最大值约为56。l_P太大了。
最大值=(k+1)*(N+1)/(k+2)
#n的数学平均值
平均值=N*k/k
#P的数学标准差。
SD=np.sqrt(N*k*(k-N)*(k-k)/k**2/(k-1))
n=np.linspace(平均-50e12,平均+50E121001)
l_P=np.零(len(n))
#计算对数(P)。
对于x范围内的i(len(l_P)):
l_P[i]=l_cb(N,N[i])+l_cb(K-N,K-N[i])-l_cb(K,K)
#标记平均值,平均值-标准差,平均值+标准差
y=np.linspace(-4e14,5e14,len(n))
x_平均值=np.one(len(n))
x_SD_L=np.one(len(n))-SD/AVG
x_SD_R=np.one(len(n))+SD/AVG
plt.绘图(n/AVG,l\U P,x\U AVG,y'r',x\U SD\U l,y'k',x\U SD\U r,y'k')
plt.xlabel('n/AVG')
plt.ylabel('log(P)'