Python 高效地为高密度区域创建密度图,为稀疏区域创建点

Python 高效地为高密度区域创建密度图,为稀疏区域创建点,python,matplotlib,Python,Matplotlib,我需要做一个绘图,它的功能类似于绘图上高密度区域的密度绘图,但低于某个阈值时使用单个点。我在matplotlib缩略图库或谷歌搜索中找不到任何与我需要的代码相似的现有代码。我有一个自己编写的工作代码,但它有点棘手,而且(更重要的是)当点数/箱数很大时,需要花费不可接受的长时间。代码如下: import numpy as np import math import matplotlib as mpl import matplotlib.pyplot as plt import pylab impo

我需要做一个绘图,它的功能类似于绘图上高密度区域的密度绘图,但低于某个阈值时使用单个点。我在matplotlib缩略图库或谷歌搜索中找不到任何与我需要的代码相似的现有代码。我有一个自己编写的工作代码,但它有点棘手,而且(更重要的是)当点数/箱数很大时,需要花费不可接受的长时间。代码如下:

import numpy as np
import math
import matplotlib as mpl
import matplotlib.pyplot as plt
import pylab
import numpy.random

#Create the colormap:
halfpurples = {'blue': [(0.0,1.0,1.0),(0.000001, 0.78431373834609985, 0.78431373834609985),
(0.25, 0.729411780834198, 0.729411780834198), (0.5,
0.63921570777893066, 0.63921570777893066), (0.75,
0.56078433990478516, 0.56078433990478516), (1.0, 0.49019607901573181,
0.49019607901573181)],

    'green': [(0.0,1.0,1.0),(0.000001,
    0.60392159223556519, 0.60392159223556519), (0.25,
    0.49019607901573181, 0.49019607901573181), (0.5,
    0.31764706969261169, 0.31764706969261169), (0.75,
    0.15294118225574493, 0.15294118225574493), (1.0, 0.0, 0.0)],

    'red': [(0.0,1.0,1.0),(0.000001,
    0.61960786581039429, 0.61960786581039429), (0.25,
    0.50196081399917603, 0.50196081399917603), (0.5,
    0.41568627953529358, 0.41568627953529358), (0.75,
    0.32941177487373352, 0.32941177487373352), (1.0,
    0.24705882370471954, 0.24705882370471954)]} 

halfpurplecmap = mpl.colors.LinearSegmentedColormap('halfpurples',halfpurples,256)

#Create x,y arrays of normally distributed points
npts = 1000
x = numpy.random.standard_normal(npts)
y = numpy.random.standard_normal(npts)

#Set bin numbers in both axes
nxbins = 25
nybins = 25

#Set the cutoff for resolving the individual points
minperbin = 1

#Make the density histrogram
H, yedges, xedges = np.histogram2d(y,x,bins=(nybins,nxbins))
#Reorient the axes
H =  H[::-1]

extent = [xedges[0],xedges[-1],yedges[0],yedges[-1]]

#Compute all bins where the density plot value is below (or equal to) the threshold
lowxleftedges = [[xedges[i] for j in range(len(H[:,i])) if H[j,i] <= minperbin] for i in range(len(H[0,:]))] 
lowxrightedges = [[xedges[i+1] for j in range(len(H[:,i])) if H[j,i] <= minperbin] for i in range(len(H[0,:]))] 
lowyleftedges = [[yedges[-(j+2)] for j in range(len(H[:,i])) if H[j,i] <= minperbin] for i in range(len(H[0,:]))]
lowyrightedges = [[yedges[-(j+1)] for j in range(len(H[:,i])) if H[j,i] <= minperbin] for i in range(len(H[0,:]))]

#Flatten and convert to numpy array
lowxleftedges = np.asarray([item for sublist in lowxleftedges for item in sublist])
lowxrightedges = np.asarray([item for sublist in lowxrightedges for item in sublist])
lowyleftedges = np.asarray([item for sublist in lowyleftedges for item in sublist])
lowyrightedges = np.asarray([item for sublist in lowyrightedges for item in sublist])

#Find all points that lie in these regions
lowdatax = [[x[i] for j in range(len(lowxleftedges)) if lowxleftedges[j] <= x[i] and x[i] <= lowxrightedges[j] and lowyleftedges[j] <= y[i] and y[i] <= lowyrightedges[j]] for i in range(len(x))]
lowdatay = [[y[i] for j in range(len(lowyleftedges)) if lowxleftedges[j] <= x[i] and x[i] <= lowxrightedges[j] and lowyleftedges[j] <= y[i] and y[i] <= lowyrightedges[j]] for i in range(len(y))]

#Flatten and convert into numpy array
lowdatax = np.asarray([item for sublist in lowdatax for item in sublist])
lowdatay = np.asarray([item for sublist in lowdatay for item in sublist])

#Plot
fig1 = plt.figure()
ax1 = fig1.add_subplot(111)
ax1.plot(lowdatax,lowdatay,linestyle='.',marker='o',mfc='k',mec='k')
cp1 = ax1.imshow(H,interpolation='nearest',extent=extent,cmap=halfpurplecmap,vmin=minperbin)
fig1.colorbar(cp1)

fig1.savefig('contourtest.eps')
将numpy导入为np
输入数学
将matplotlib导入为mpl
将matplotlib.pyplot作为plt导入
进口派拉布
导入numpy.random
#创建颜色映射:
半紫色={'blue':[(0.0,1.0,1.0),(0.000001,0.78431373834609985,0.78431373834609985),
(0.25, 0.729411780834198, 0.729411780834198), (0.5,
0.63921570777893066, 0.63921570777893066), (0.75,
0.56078433990478516, 0.56078433990478516), (1.0, 0.49019607901573181,
0.49019607901573181)],
“绿色”:[(0.0,1.0,1.0),(0.000001,
0.60392159223556519, 0.60392159223556519), (0.25,
0.49019607901573181, 0.49019607901573181), (0.5,
0.31764706969261169, 0.31764706969261169), (0.75,
0.15294118225574493, 0.15294118225574493), (1.0, 0.0, 0.0)],
“红色”:[(0.0,1.0,1.0),(0.000001,
0.61960786581039429, 0.61960786581039429), (0.25,
0.50196081399917603, 0.50196081399917603), (0.5,
0.41568627953529358, 0.41568627953529358), (0.75,
0.32941177487373352, 0.32941177487373352), (1.0,
0.24705882370471954, 0.24705882370471954)]} 
HalfPurpleMap=mpl.colors.LinearSegmentedColormap('halfpurples',halfpurples,256)
#创建正态分布点的x、y阵列
净现值=1000
x=numpy.random.standard_normal(npts)
y=numpy.随机.标准_正常(npts)
#在两个轴上设置料仓编号
nxbins=25
尼宾斯=25
#设置用于解析单个点的截止点
minperbin=1
#编制密度历史程序
H、 yedges,xedges=np.histogram2d(y,x,bins=(nybins,nxbins))
#调整轴的方向
H=H[:-1]
范围=[xedges[0],xedges[-1],yedges[0],yedges[-1]]
#计算密度图值低于(或等于)阈值的所有箱子

lowxleftedges=[[xedges[i]表示范围内的j(len(H[:,i])),如果H[j,i]您的问题是二次的-对于NPT=1000,您的数组大小达到10^6个点,然后使用列表理解迭代这些列表。
现在,这当然是一个品味问题,但我发现列表理解可以产生一个完全难以理解的代码,有时它们只会稍微快一点……但这不是我的观点。
我的观点是,对于大型阵列操作,有如下numpy函数:

np.where, np.choose etc.
请注意,您可以使用NumPy实现列表理解的功能,并且您的代码应该运行得更快

我是否正确理解你的评论

#Find all points that lie in these regions

你是在多边形内测试一个点吗?如果是这样,考虑一下MatMattLIB。< /P> < P>一夜睡觉后,通过OZ123的建议,我就明白了。技巧是计算每个X,Y点落入(席席,彝)的席,然后测试H[Xi,Y](实际上,在我的情况下,H[易,XI])低于阈值。代码低于阈值,对于大量点运行速度非常快,并且更干净:

import numpy as np
import math
import matplotlib as mpl
import matplotlib.pyplot as plt
import pylab
import numpy.random

#Create the colormap:
halfpurples = {'blue': [(0.0,1.0,1.0),(0.000001, 0.78431373834609985, 0.78431373834609985),
0.25, 0.729411780834198, 0.729411780834198), (0.5,
0.63921570777893066, 0.63921570777893066), (0.75,
0.56078433990478516, 0.56078433990478516), (1.0, 0.49019607901573181,
0.49019607901573181)],

    'green': [(0.0,1.0,1.0),(0.000001,
    0.60392159223556519, 0.60392159223556519), (0.25,
    0.49019607901573181, 0.49019607901573181), (0.5,
    0.31764706969261169, 0.31764706969261169), (0.75,
    0.15294118225574493, 0.15294118225574493), (1.0, 0.0, 0.0)],

    'red': [(0.0,1.0,1.0),(0.000001,
    0.61960786581039429, 0.61960786581039429), (0.25,
    0.50196081399917603, 0.50196081399917603), (0.5,
    0.41568627953529358, 0.41568627953529358), (0.75,
    0.32941177487373352, 0.32941177487373352), (1.0,
    0.24705882370471954, 0.24705882370471954)]} 

halfpurplecmap = mpl.colors.LinearSegmentedColormap('halfpurples',halfpurples,256)

#Create x,y arrays of normally distributed points
npts = 100000
x = numpy.random.standard_normal(npts)
y = numpy.random.standard_normal(npts)

#Set bin numbers in both axes
nxbins = 100
nybins = 100

#Set the cutoff for resolving the individual points
minperbin = 1

#Make the density histrogram
H, yedges, xedges = np.histogram2d(y,x,bins=(nybins,nxbins))
#Reorient the axes
H =  H[::-1]

extent = [xedges[0],xedges[-1],yedges[0],yedges[-1]]

#Figure out which bin each x,y point is in
xbinsize = xedges[1]-xedges[0]
ybinsize = yedges[1]-yedges[0]
xi = ((x-xedges[0])/xbinsize).astype(np.integer)
yi = nybins-1-((y-yedges[0])/ybinsize).astype(np.integer)

#Subtract one from any points exactly on the right and upper edges of the region
xim1 = xi-1
yim1 = yi-1
xi = np.where(xi < nxbins,xi,xim1)
yi = np.where(yi < nybins,yi,yim1)

#Get all points with density below the threshold
lowdensityx = x[H[yi,xi] <= minperbin]
lowdensityy = y[H[yi,xi] <= minperbin]

#Plot
fig1 = plt.figure()
ax1 = fig1.add_subplot(111)
ax1.plot(lowdensityx,lowdensityy,linestyle='.',marker='o',mfc='k',mec='k',ms=3)
cp1 = ax1.imshow(H,interpolation='nearest',extent=extent,cmap=halfpurplecmap,vmin=minperbin)
fig1.colorbar(cp1)

fig1.savefig('contourtest.eps')
将numpy导入为np
输入数学
将matplotlib导入为mpl
将matplotlib.pyplot作为plt导入
进口派拉布
导入numpy.random
#创建颜色映射:
半紫色={'blue':[(0.0,1.0,1.0),(0.000001,0.78431373834609985,0.78431373834609985),
0.25, 0.729411780834198, 0.729411780834198), (0.5,
0.63921570777893066, 0.63921570777893066), (0.75,
0.56078433990478516, 0.56078433990478516), (1.0, 0.49019607901573181,
0.49019607901573181)],
“绿色”:[(0.0,1.0,1.0),(0.000001,
0.60392159223556519, 0.60392159223556519), (0.25,
0.49019607901573181, 0.49019607901573181), (0.5,
0.31764706969261169, 0.31764706969261169), (0.75,
0.15294118225574493, 0.15294118225574493), (1.0, 0.0, 0.0)],
“红色”:[(0.0,1.0,1.0),(0.000001,
0.61960786581039429, 0.61960786581039429), (0.25,
0.50196081399917603, 0.50196081399917603), (0.5,
0.41568627953529358, 0.41568627953529358), (0.75,
0.32941177487373352, 0.32941177487373352), (1.0,
0.24705882370471954, 0.24705882370471954)]} 
HalfPurpleMap=mpl.colors.LinearSegmentedColormap('halfpurples',halfpurples,256)
#创建正态分布点的x、y阵列
净现值=100000
x=numpy.random.standard_normal(npts)
y=numpy.随机.标准_正常(npts)
#在两个轴上设置料仓编号
nxbins=100
尼宾斯=100
#设置用于解析单个点的截止点
minperbin=1
#编制密度历史程序
H、 yedges,xedges=np.histogram2d(y,x,bins=(nybins,nxbins))
#调整轴的方向
H=H[:-1]
范围=[xedges[0],xedges[-1],yedges[0],yedges[-1]]
#找出每个x,y点所在的箱子
xbinsize=xedges[1]-xedges[0]
ybinsize=yedges[1]-yedges[0]
席=((X-XREST(0))/xBIZSIZE)astype(NP整数)
yi=nybins-1-((y-yedges[0])/ybinsize).astype(np.integer)
#从恰好位于区域右边缘和上边缘的任何点中减去一个
xim1=xi-1
yim1=yi-1
X= NP(其中,席席NXBIs,席,XIM1)
yi=np.where(yi
import matplotlib.pyplot as plt, numpy as np, numpy.random, scipy

#histogram definition
xyrange = [[-5,5],[-5,5]] # data range
bins = [100,100] # number of bins
thresh = 3  #density threshold

#data definition
N = 1e5;
xdat, ydat = np.random.normal(size=N), np.random.normal(1, 0.6, size=N)

# histogram the data
hh, locx, locy = scipy.histogram2d(xdat, ydat, range=xyrange, bins=bins)
posx = np.digitize(xdat, locx)
posy = np.digitize(ydat, locy)

#select points within the histogram
ind = (posx > 0) & (posx <= bins[0]) & (posy > 0) & (posy <= bins[1])
hhsub = hh[posx[ind] - 1, posy[ind] - 1] # values of the histogram where the points are
xdat1 = xdat[ind][hhsub < thresh] # low density points
ydat1 = ydat[ind][hhsub < thresh]
hh[hh < thresh] = np.nan # fill the areas with low density by NaNs

plt.imshow(np.flipud(hh.T),cmap='jet',extent=np.array(xyrange).flatten(), interpolation='none', origin='upper')
plt.colorbar()   
plt.plot(xdat1, ydat1, '.',color='darkblue')
plt.show()
导入matplotlib.pyplot作为plt,numpy作为np,numpy.random,scipy
#直方图定义
xyrange=[-5,5],-5,5]]#数据范围
箱子=[100100]#箱子数量
阈值=3#密度阈值
#数据定义
N=1e5;
xdat,ydat=np.random.normal(大小=N),np.random.normal(1,0.6,大小=N)
#对数据进行直方图分析
hh,locx,locy=scipy.historogram2d(xdat,ydat,range=xyrange,bins=bins)
posx=np.数字化(xdat,locx)
posy=np.数字化(ydat,locy)
#选择直方图中的点

ind=(posx>0)&(posx 0)&(posy对于记录,这里是使用而不是2D直方图的新尝试的结果。 人们可以根据不同的目的设想不同的颜色网格和轮廓组合

import numpy as np
from matplotlib import pyplot as plt
from scipy.stats import gaussian_kde

# parameters
npts = 5000         # number of sample points
bins = 100          # number of bins in density maps
threshold = 0.01    # density threshold for scatter plot

# initialize figure
fig, ax = plt.subplots()

# create a random dataset
x1, y1 = np.random.multivariate_normal([0, 0], [[1, 0], [0, 1]], npts/2).T
x2, y2 = np.random.multivariate_normal([4, 4], [[4, 0], [0, 1]], npts/2).T
x = np.hstack((x1, x2))
y = np.hstack((y1, y2))
points = np.vstack([x, y])

# perform kernel density estimate
kde = gaussian_kde(points)
z = kde(points)

# mask points above density threshold
x = np.ma.masked_where(z > threshold, x)
y = np.ma.masked_where(z > threshold, y)

# plot unmasked points
ax.scatter(x, y, c='black', marker='.')

# get bounds from axes
xmin, xmax = ax.get_xlim()
ymin, ymax = ax.get_ylim()

# prepare grid for density map
xedges = np.linspace(xmin, xmax, bins)
yedges = np.linspace(ymin, ymax, bins)
xx, yy = np.meshgrid(xedges, yedges)
gridpoints = np.array([xx.ravel(), yy.ravel()])

# compute density map
zz = np.reshape(kde(gridpoints), xx.shape)

# plot density map
im = ax.imshow(zz, cmap='CMRmap_r', interpolation='nearest',
               origin='lower', extent=[xmin, xmax, ymin, ymax])

# plot threshold contour
cs = ax.contour(xx, yy, zz, levels=[threshold], colors='black')

# show
fig.colorbar(im)
plt.show()

很好,这是最好的选择