Python 高效地为高密度区域创建密度图,为稀疏区域创建点
我需要做一个绘图,它的功能类似于绘图上高密度区域的密度绘图,但低于某个阈值时使用单个点。我在matplotlib缩略图库或谷歌搜索中找不到任何与我需要的代码相似的现有代码。我有一个自己编写的工作代码,但它有点棘手,而且(更重要的是)当点数/箱数很大时,需要花费不可接受的长时间。代码如下:Python 高效地为高密度区域创建密度图,为稀疏区域创建点,python,matplotlib,Python,Matplotlib,我需要做一个绘图,它的功能类似于绘图上高密度区域的密度绘图,但低于某个阈值时使用单个点。我在matplotlib缩略图库或谷歌搜索中找不到任何与我需要的代码相似的现有代码。我有一个自己编写的工作代码,但它有点棘手,而且(更重要的是)当点数/箱数很大时,需要花费不可接受的长时间。代码如下: import numpy as np import math import matplotlib as mpl import matplotlib.pyplot as plt import pylab impo
import numpy as np
import math
import matplotlib as mpl
import matplotlib.pyplot as plt
import pylab
import numpy.random
#Create the colormap:
halfpurples = {'blue': [(0.0,1.0,1.0),(0.000001, 0.78431373834609985, 0.78431373834609985),
(0.25, 0.729411780834198, 0.729411780834198), (0.5,
0.63921570777893066, 0.63921570777893066), (0.75,
0.56078433990478516, 0.56078433990478516), (1.0, 0.49019607901573181,
0.49019607901573181)],
'green': [(0.0,1.0,1.0),(0.000001,
0.60392159223556519, 0.60392159223556519), (0.25,
0.49019607901573181, 0.49019607901573181), (0.5,
0.31764706969261169, 0.31764706969261169), (0.75,
0.15294118225574493, 0.15294118225574493), (1.0, 0.0, 0.0)],
'red': [(0.0,1.0,1.0),(0.000001,
0.61960786581039429, 0.61960786581039429), (0.25,
0.50196081399917603, 0.50196081399917603), (0.5,
0.41568627953529358, 0.41568627953529358), (0.75,
0.32941177487373352, 0.32941177487373352), (1.0,
0.24705882370471954, 0.24705882370471954)]}
halfpurplecmap = mpl.colors.LinearSegmentedColormap('halfpurples',halfpurples,256)
#Create x,y arrays of normally distributed points
npts = 1000
x = numpy.random.standard_normal(npts)
y = numpy.random.standard_normal(npts)
#Set bin numbers in both axes
nxbins = 25
nybins = 25
#Set the cutoff for resolving the individual points
minperbin = 1
#Make the density histrogram
H, yedges, xedges = np.histogram2d(y,x,bins=(nybins,nxbins))
#Reorient the axes
H = H[::-1]
extent = [xedges[0],xedges[-1],yedges[0],yedges[-1]]
#Compute all bins where the density plot value is below (or equal to) the threshold
lowxleftedges = [[xedges[i] for j in range(len(H[:,i])) if H[j,i] <= minperbin] for i in range(len(H[0,:]))]
lowxrightedges = [[xedges[i+1] for j in range(len(H[:,i])) if H[j,i] <= minperbin] for i in range(len(H[0,:]))]
lowyleftedges = [[yedges[-(j+2)] for j in range(len(H[:,i])) if H[j,i] <= minperbin] for i in range(len(H[0,:]))]
lowyrightedges = [[yedges[-(j+1)] for j in range(len(H[:,i])) if H[j,i] <= minperbin] for i in range(len(H[0,:]))]
#Flatten and convert to numpy array
lowxleftedges = np.asarray([item for sublist in lowxleftedges for item in sublist])
lowxrightedges = np.asarray([item for sublist in lowxrightedges for item in sublist])
lowyleftedges = np.asarray([item for sublist in lowyleftedges for item in sublist])
lowyrightedges = np.asarray([item for sublist in lowyrightedges for item in sublist])
#Find all points that lie in these regions
lowdatax = [[x[i] for j in range(len(lowxleftedges)) if lowxleftedges[j] <= x[i] and x[i] <= lowxrightedges[j] and lowyleftedges[j] <= y[i] and y[i] <= lowyrightedges[j]] for i in range(len(x))]
lowdatay = [[y[i] for j in range(len(lowyleftedges)) if lowxleftedges[j] <= x[i] and x[i] <= lowxrightedges[j] and lowyleftedges[j] <= y[i] and y[i] <= lowyrightedges[j]] for i in range(len(y))]
#Flatten and convert into numpy array
lowdatax = np.asarray([item for sublist in lowdatax for item in sublist])
lowdatay = np.asarray([item for sublist in lowdatay for item in sublist])
#Plot
fig1 = plt.figure()
ax1 = fig1.add_subplot(111)
ax1.plot(lowdatax,lowdatay,linestyle='.',marker='o',mfc='k',mec='k')
cp1 = ax1.imshow(H,interpolation='nearest',extent=extent,cmap=halfpurplecmap,vmin=minperbin)
fig1.colorbar(cp1)
fig1.savefig('contourtest.eps')
将numpy导入为np
输入数学
将matplotlib导入为mpl
将matplotlib.pyplot作为plt导入
进口派拉布
导入numpy.random
#创建颜色映射:
半紫色={'blue':[(0.0,1.0,1.0),(0.000001,0.78431373834609985,0.78431373834609985),
(0.25, 0.729411780834198, 0.729411780834198), (0.5,
0.63921570777893066, 0.63921570777893066), (0.75,
0.56078433990478516, 0.56078433990478516), (1.0, 0.49019607901573181,
0.49019607901573181)],
“绿色”:[(0.0,1.0,1.0),(0.000001,
0.60392159223556519, 0.60392159223556519), (0.25,
0.49019607901573181, 0.49019607901573181), (0.5,
0.31764706969261169, 0.31764706969261169), (0.75,
0.15294118225574493, 0.15294118225574493), (1.0, 0.0, 0.0)],
“红色”:[(0.0,1.0,1.0),(0.000001,
0.61960786581039429, 0.61960786581039429), (0.25,
0.50196081399917603, 0.50196081399917603), (0.5,
0.41568627953529358, 0.41568627953529358), (0.75,
0.32941177487373352, 0.32941177487373352), (1.0,
0.24705882370471954, 0.24705882370471954)]}
HalfPurpleMap=mpl.colors.LinearSegmentedColormap('halfpurples',halfpurples,256)
#创建正态分布点的x、y阵列
净现值=1000
x=numpy.random.standard_normal(npts)
y=numpy.随机.标准_正常(npts)
#在两个轴上设置料仓编号
nxbins=25
尼宾斯=25
#设置用于解析单个点的截止点
minperbin=1
#编制密度历史程序
H、 yedges,xedges=np.histogram2d(y,x,bins=(nybins,nxbins))
#调整轴的方向
H=H[:-1]
范围=[xedges[0],xedges[-1],yedges[0],yedges[-1]]
#计算密度图值低于(或等于)阈值的所有箱子
lowxleftedges=[[xedges[i]表示范围内的j(len(H[:,i])),如果H[j,i]您的问题是二次的-对于NPT=1000,您的数组大小达到10^6个点,然后使用列表理解迭代这些列表。
现在,这当然是一个品味问题,但我发现列表理解可以产生一个完全难以理解的代码,有时它们只会稍微快一点……但这不是我的观点。
我的观点是,对于大型阵列操作,有如下numpy函数:
np.where, np.choose etc.
请注意,您可以使用NumPy实现列表理解的功能,并且您的代码应该运行得更快
我是否正确理解你的评论
#Find all points that lie in these regions
你是在多边形内测试一个点吗?如果是这样,考虑一下MatMattLIB。< /P> < P>一夜睡觉后,通过OZ123的建议,我就明白了。技巧是计算每个X,Y点落入(席席,彝)的席,然后测试H[Xi,Y](实际上,在我的情况下,H[易,XI])低于阈值。代码低于阈值,对于大量点运行速度非常快,并且更干净:
import numpy as np
import math
import matplotlib as mpl
import matplotlib.pyplot as plt
import pylab
import numpy.random
#Create the colormap:
halfpurples = {'blue': [(0.0,1.0,1.0),(0.000001, 0.78431373834609985, 0.78431373834609985),
0.25, 0.729411780834198, 0.729411780834198), (0.5,
0.63921570777893066, 0.63921570777893066), (0.75,
0.56078433990478516, 0.56078433990478516), (1.0, 0.49019607901573181,
0.49019607901573181)],
'green': [(0.0,1.0,1.0),(0.000001,
0.60392159223556519, 0.60392159223556519), (0.25,
0.49019607901573181, 0.49019607901573181), (0.5,
0.31764706969261169, 0.31764706969261169), (0.75,
0.15294118225574493, 0.15294118225574493), (1.0, 0.0, 0.0)],
'red': [(0.0,1.0,1.0),(0.000001,
0.61960786581039429, 0.61960786581039429), (0.25,
0.50196081399917603, 0.50196081399917603), (0.5,
0.41568627953529358, 0.41568627953529358), (0.75,
0.32941177487373352, 0.32941177487373352), (1.0,
0.24705882370471954, 0.24705882370471954)]}
halfpurplecmap = mpl.colors.LinearSegmentedColormap('halfpurples',halfpurples,256)
#Create x,y arrays of normally distributed points
npts = 100000
x = numpy.random.standard_normal(npts)
y = numpy.random.standard_normal(npts)
#Set bin numbers in both axes
nxbins = 100
nybins = 100
#Set the cutoff for resolving the individual points
minperbin = 1
#Make the density histrogram
H, yedges, xedges = np.histogram2d(y,x,bins=(nybins,nxbins))
#Reorient the axes
H = H[::-1]
extent = [xedges[0],xedges[-1],yedges[0],yedges[-1]]
#Figure out which bin each x,y point is in
xbinsize = xedges[1]-xedges[0]
ybinsize = yedges[1]-yedges[0]
xi = ((x-xedges[0])/xbinsize).astype(np.integer)
yi = nybins-1-((y-yedges[0])/ybinsize).astype(np.integer)
#Subtract one from any points exactly on the right and upper edges of the region
xim1 = xi-1
yim1 = yi-1
xi = np.where(xi < nxbins,xi,xim1)
yi = np.where(yi < nybins,yi,yim1)
#Get all points with density below the threshold
lowdensityx = x[H[yi,xi] <= minperbin]
lowdensityy = y[H[yi,xi] <= minperbin]
#Plot
fig1 = plt.figure()
ax1 = fig1.add_subplot(111)
ax1.plot(lowdensityx,lowdensityy,linestyle='.',marker='o',mfc='k',mec='k',ms=3)
cp1 = ax1.imshow(H,interpolation='nearest',extent=extent,cmap=halfpurplecmap,vmin=minperbin)
fig1.colorbar(cp1)
fig1.savefig('contourtest.eps')
将numpy导入为np
输入数学
将matplotlib导入为mpl
将matplotlib.pyplot作为plt导入
进口派拉布
导入numpy.random
#创建颜色映射:
半紫色={'blue':[(0.0,1.0,1.0),(0.000001,0.78431373834609985,0.78431373834609985),
0.25, 0.729411780834198, 0.729411780834198), (0.5,
0.63921570777893066, 0.63921570777893066), (0.75,
0.56078433990478516, 0.56078433990478516), (1.0, 0.49019607901573181,
0.49019607901573181)],
“绿色”:[(0.0,1.0,1.0),(0.000001,
0.60392159223556519, 0.60392159223556519), (0.25,
0.49019607901573181, 0.49019607901573181), (0.5,
0.31764706969261169, 0.31764706969261169), (0.75,
0.15294118225574493, 0.15294118225574493), (1.0, 0.0, 0.0)],
“红色”:[(0.0,1.0,1.0),(0.000001,
0.61960786581039429, 0.61960786581039429), (0.25,
0.50196081399917603, 0.50196081399917603), (0.5,
0.41568627953529358, 0.41568627953529358), (0.75,
0.32941177487373352, 0.32941177487373352), (1.0,
0.24705882370471954, 0.24705882370471954)]}
HalfPurpleMap=mpl.colors.LinearSegmentedColormap('halfpurples',halfpurples,256)
#创建正态分布点的x、y阵列
净现值=100000
x=numpy.random.standard_normal(npts)
y=numpy.随机.标准_正常(npts)
#在两个轴上设置料仓编号
nxbins=100
尼宾斯=100
#设置用于解析单个点的截止点
minperbin=1
#编制密度历史程序
H、 yedges,xedges=np.histogram2d(y,x,bins=(nybins,nxbins))
#调整轴的方向
H=H[:-1]
范围=[xedges[0],xedges[-1],yedges[0],yedges[-1]]
#找出每个x,y点所在的箱子
xbinsize=xedges[1]-xedges[0]
ybinsize=yedges[1]-yedges[0]
席=((X-XREST(0))/xBIZSIZE)astype(NP整数)
yi=nybins-1-((y-yedges[0])/ybinsize).astype(np.integer)
#从恰好位于区域右边缘和上边缘的任何点中减去一个
xim1=xi-1
yim1=yi-1
X= NP(其中,席席NXBIs,席,XIM1)
yi=np.where(yi
import matplotlib.pyplot as plt, numpy as np, numpy.random, scipy
#histogram definition
xyrange = [[-5,5],[-5,5]] # data range
bins = [100,100] # number of bins
thresh = 3 #density threshold
#data definition
N = 1e5;
xdat, ydat = np.random.normal(size=N), np.random.normal(1, 0.6, size=N)
# histogram the data
hh, locx, locy = scipy.histogram2d(xdat, ydat, range=xyrange, bins=bins)
posx = np.digitize(xdat, locx)
posy = np.digitize(ydat, locy)
#select points within the histogram
ind = (posx > 0) & (posx <= bins[0]) & (posy > 0) & (posy <= bins[1])
hhsub = hh[posx[ind] - 1, posy[ind] - 1] # values of the histogram where the points are
xdat1 = xdat[ind][hhsub < thresh] # low density points
ydat1 = ydat[ind][hhsub < thresh]
hh[hh < thresh] = np.nan # fill the areas with low density by NaNs
plt.imshow(np.flipud(hh.T),cmap='jet',extent=np.array(xyrange).flatten(), interpolation='none', origin='upper')
plt.colorbar()
plt.plot(xdat1, ydat1, '.',color='darkblue')
plt.show()
导入matplotlib.pyplot作为plt,numpy作为np,numpy.random,scipy
#直方图定义
xyrange=[-5,5],-5,5]]#数据范围
箱子=[100100]#箱子数量
阈值=3#密度阈值
#数据定义
N=1e5;
xdat,ydat=np.random.normal(大小=N),np.random.normal(1,0.6,大小=N)
#对数据进行直方图分析
hh,locx,locy=scipy.historogram2d(xdat,ydat,range=xyrange,bins=bins)
posx=np.数字化(xdat,locx)
posy=np.数字化(ydat,locy)
#选择直方图中的点
ind=(posx>0)&(posx 0)&(posy对于记录,这里是使用而不是2D直方图的新尝试的结果。
人们可以根据不同的目的设想不同的颜色网格和轮廓组合
import numpy as np
from matplotlib import pyplot as plt
from scipy.stats import gaussian_kde
# parameters
npts = 5000 # number of sample points
bins = 100 # number of bins in density maps
threshold = 0.01 # density threshold for scatter plot
# initialize figure
fig, ax = plt.subplots()
# create a random dataset
x1, y1 = np.random.multivariate_normal([0, 0], [[1, 0], [0, 1]], npts/2).T
x2, y2 = np.random.multivariate_normal([4, 4], [[4, 0], [0, 1]], npts/2).T
x = np.hstack((x1, x2))
y = np.hstack((y1, y2))
points = np.vstack([x, y])
# perform kernel density estimate
kde = gaussian_kde(points)
z = kde(points)
# mask points above density threshold
x = np.ma.masked_where(z > threshold, x)
y = np.ma.masked_where(z > threshold, y)
# plot unmasked points
ax.scatter(x, y, c='black', marker='.')
# get bounds from axes
xmin, xmax = ax.get_xlim()
ymin, ymax = ax.get_ylim()
# prepare grid for density map
xedges = np.linspace(xmin, xmax, bins)
yedges = np.linspace(ymin, ymax, bins)
xx, yy = np.meshgrid(xedges, yedges)
gridpoints = np.array([xx.ravel(), yy.ravel()])
# compute density map
zz = np.reshape(kde(gridpoints), xx.shape)
# plot density map
im = ax.imshow(zz, cmap='CMRmap_r', interpolation='nearest',
origin='lower', extent=[xmin, xmax, ymin, ymax])
# plot threshold contour
cs = ax.contour(xx, yy, zz, levels=[threshold], colors='black')
# show
fig.colorbar(im)
plt.show()
很好,这是最好的选择