Python 有效地计算邻居之间的距离
我的数据在地理上是分散的,没有任何模式,我需要创建一个图像,其中每个像素的值是该像素邻域小于X米的平均值 为此,我使用库Python 有效地计算邻居之间的距离,python,algorithm,numpy,scipy,spatial,Python,Algorithm,Numpy,Scipy,Spatial,我的数据在地理上是分散的,没有任何模式,我需要创建一个图像,其中每个像素的值是该像素邻域小于X米的平均值 为此,我使用库scipy.spatial生成包含数据的KDTree(cKDTree)。一旦生成了数据结构,我就在地理上定位像素,并定位最近的地理点 # Generate scattered data points coord_cart= [ [ feat.geometry().GetY(), feat.geometry().GetX(),
scipy.spatial
生成包含数据的KDTree(cKDTree
)。一旦生成了数据结构,我就在地理上定位像素,并定位最近的地理点
# Generate scattered data points
coord_cart= [
[
feat.geometry().GetY(),
feat.geometry().GetX(),
feat.GetField(feature),
] for feat in layer
]
# Create KDTree structure
tree = cKDTree(coord_cart)
# Get raster image dimensions
pixel_size = 5
source_layer = shapefile.GetLayer()
x_min, x_max, y_min, y_max = source_layer.GetExtent()
x_res = int((x_max - x_min) / pixel_size)
y_res = int((y_max - y_min) / pixel_size)
# Create grid
x = np.linspace(x_min, x_max, x_res)
y = np.linspace(y_min, y_max, y_res)
X, Y = np.meshgrid(x, y)
grid = np.array(zip(Y.ravel(), X.ravel()))
# Get points that are less than 10 meters away
inds = tree.query_ball_point(grid, 10)
# inds is an np.array of lists of different length, so I need to convert it into an array of n_points x maximum number of neighbors
ll = np.array([len(l) for l in inds])
maxlen = max(ll)
arr = np.zeros((len(ll), maxlen), int)
# I don't know why but inds is an array of list, so I convert it into an array of array to use grid[inds]
# I THINK THIS IS A LITTLE INEFFICIENT
for i in range(len(inds)):
inds[i].extend([i] * (maxlen - len(inds[i])))
arr[i] = np.array(inds[i], dtype=int)
# AND THIS DOESN'T WORK
d = np.linalg.norm(grid - grid[inds])
有更好的方法吗?我正在尝试使用来执行点之间的插值。我发现它使用了一个函数来获取N个最近点,但它对我不起作用,因为我需要,如果半径R中没有点,那么像素的值是0
d, inds = tree.query(zip(xt, yt, zt), k = 10)
w = 1.0 / d**2
air_idw = np.sum(w * air.flatten()[inds], axis=1) / np.sum(w, axis=1)
air_idw.shape = lon_curv.shape
提前谢谢 这可能是KDtree不是一个好的解决方案的情况之一。这是因为您要映射到一个网格,这是一个非常简单的结构,这意味着KDTree的复杂度没有任何好处。最近的网格点和距离可以通过简单的算法找到 下面是一个简单的示例实现。我使用的是高斯核,但如果您愿意,可以直接将其改为IDW
import numpy as np
from scipy import stats
def rasterize(coords, feature, gu, cutoff, kernel=stats.norm(0, 2.5).pdf):
# compute overlap (filter size / grid unit)
ovlp = int(np.ceil(cutoff/gu))
# compute raster dimensions
mn, mx = coords.min(axis=0), coords.max(axis=0)
reso = np.ceil((mx - mn) / gu).astype(int)
base = (mx + mn - reso * gu) / 2
# map coordinates to raster, the residual is the distance
grid_res = coords - base
grid_coords = np.rint(grid_res / gu).astype(int)
grid_res -= gu * grid_coords
# because of overlap we must add neighboring grid points to the nearest
gcovlp = np.c_[-ovlp:ovlp+1, np.zeros(2*ovlp+1, dtype=int)]
grid_coords = (gcovlp[:, None, None, :] + gcovlp[None, :, None, ::-1]
+ grid_coords).reshape(-1, 2)
# the corresponding residuals have the same offset with opposite sign
gdovlp = -gu * (gcovlp+1/2)
grid_res = (gdovlp[:, None, None, :] + gdovlp[None, :, None, ::-1]
+ grid_res).reshape(-1, 2)
# discard off fov grid points and points outside the cutoff
valid, = np.where(((grid_coords>=0) & (grid_coords<=reso)).all(axis=1) & (
np.einsum('ij,ij->i', grid_res, grid_res) <= cutoff*cutoff))
grid_res = grid_res[valid]
feature = feature[valid // (2*ovlp+1)**2]
# flatten grid so we can use bincount
grid_flat = np.ravel_multi_index(grid_coords[valid].T, reso+1)
return np.bincount(
grid_flat,
feature * kernel(np.sqrt(np.einsum('ij,ij->i', grid_res, grid_res))),
(reso + 1).prod()).reshape(reso+1)
gu = 5
cutoff = 10
coords = np.random.randn(10_000, 2) * (100, 20)
coords[:, 1] += 80 * np.sin(coords[:, 0] / 40)
feature = np.random.uniform(0, 1000, (10_000,))
from timeit import timeit
print(timeit("rasterize(coords, feature, gu, cutoff)", globals=globals(), number=100)*10, 'ms')
pic = rasterize(coords, feature, gu, cutoff)
import pylab
pylab.pcolor(pic, cmap=pylab.cm.jet)
pylab.colorbar()
pylab.show()
将numpy导入为np
从scipy导入统计信息
def光栅化(坐标、特征、gu、截止、内核=stats.norm(0,2.5).pdf):
#计算重叠(过滤器大小/网格单位)
ovlp=int(np.ceil(截止值/gu))
#计算光栅尺寸
mn,mx=coords.min(轴=0),coords.max(轴=0)
reso=np.ceil((mx-mn)/gu.astype(int)
基数=(mx+mn-分辨率*gu)/2
#将坐标映射到光栅,剩余值为距离
网格=坐标-基准
grid\u coords=np.rint(grid\u res/gu).astype(int)
网格分辨率-=gu*网格坐标
#由于重叠,我们必须将相邻的栅格点添加到最近的栅格点
gcovlp=np.c.[-ovlp:ovlp+1,np.zero(2*ovlp+1,dtype=int)]
网格坐标=(gcovlp[:,无,无,:]+gcovlp[None,:,无,:-1]
+网格坐标)。重塑(-1,2)
#相应的残差具有相同的偏移量和相反的符号
gdovlp=-gu*(gcovlp+1/2)
grid_res=(gdovlp[:,无,无,:]+gdovlp[None,:,无,:-1]
+网格分辨率)。重塑(-1,2)
#放弃fov栅格点和截止点之外的点
有效,=np.其中((网格坐标>=0)和(grid_coordsWow!使用numpy函数是多么有趣的一种方式。不幸的是,我的数据没有结构化,所以我想我必须使用KDTree。我马上发布我的解决方案。但是,新数据分散在一个矩阵中,我需要执行相同的分析,因此我认为在这种情况下我可以使用您的解决方案。我将尝试一下,并留给您共同的解决方案ts