C# 优化在值列表中查找最接近的值
我很感激下面有人问过类似的问题,但我看到的问题都没有超过数万行。尽管这些“大”的代码相当慢,但它们相对来说是微不足道的。在我的例子中,我处理的是数百万到数亿行,所以即使是很小的优化也可能非常有用 到目前为止,我一直依赖的答案是和。我所做的升级并不令人惊讶 为了全面了解,我有一个大约2000000行的参考文件(格式为easting northing value),从中查找与C# 优化在值列表中查找最接近的值,c#,list,linq,numbers,C#,List,Linq,Numbers,我很感激下面有人问过类似的问题,但我看到的问题都没有超过数万行。尽管这些“大”的代码相当慢,但它们相对来说是微不足道的。在我的例子中,我处理的是数百万到数亿行,所以即使是很小的优化也可能非常有用 到目前为止,我一直依赖的答案是和。我所做的升级并不令人惊讶 为了全面了解,我有一个大约2000000行的参考文件(格式为easting northing value),从中查找与ProcessedLineData(我不能保证我的列表中存在精确匹配,因此我需要计算整个列表中的每个实例,以找到最接近的可行点
ProcessedLineData
(我不能保证我的列表中存在精确匹配,因此我需要计算整个列表中的每个实例,以找到最接近的可行点)
我的第一个选择非常缓慢,那就是:
AsciiFile.Sample _closestSample = Modifications.Value_and_Ref_Calc.ReferenceFile.Dataset[0].Data
.OrderBy(t => Public.CalculateDistance(t.Easting, t.Northing, ProcessedLineData.Easting, ProcessedLineData.Northing))
.First();
在此之后,我认为第二个示例的Aggregate
效果会更好,因此我选择了这个:
AsciiFile.Sample _closestSample = Modifications.Value_and_Ref_Calc.ReferenceFile.Dataset[0].Data
.Aggregate((x, y) =>
Public.CalculateDistance(x.Easting, x.Northing, ProcessedLineData.Easting, ProcessedLineData.Northing)
<
Public.CalculateDistance(y.Easting, y.Northing, ProcessedLineData.Easting, ProcessedLineData.Northing)
? x : y);
在这些示例中,Modifications.Value\u和\u Ref\u Calc.ReferenceFile.Dataset[0]。Data
是我的2000000个参考点列表
如果我没有数亿个ProcessedLineData
点,我会找到最接近的值(从数万个点的小文件到数千万个点的大文件),那么这就不那么重要了
最终目的:我正在查找最接近的值,以便能够使用与特定修改相关联的高程。value_和_Ref_Calc.ReferenceFile.Dataset[0].Data
并使用它修改myProcessedLineData
中的值。整个序列中最慢的部分肯定是搜索最接近的值
是否有任何明显的方法可以优化我所缺少的代码?在不了解任何关于值的范围和精度的情况下,或者假设太多关于查找的分布与参考点列表的变化,对
for
循环进行一些简单的优化可以产生大约30倍的加速r 100查找更快的订购方/第一个
代码:
使用pld
处理您的ProcessedLineData
和数据
进行修改。Value\u和\u Ref\u Calc.ReferenceFile.Dataset[0]。数据
您可以得到:
var _closestSample = data[0];
var dist = (_closestSample.Easting - pld.Easting) * (_closestSample.Easting - pld.Easting) + (_closestSample.Northing - pld.Northing) * (_closestSample.Northing - pld.Northing);
for (int j2 = 1; j2 < data.Count; ++j2) {
var y = data[j2];
var ydist = (y.Easting - pld.Easting) * (y.Easting - pld.Easting) + (y.Northing - pld.Northing) * (y.Northing - pld.Northing);
if (ydist < dist) {
dist = ydist;
_closestSample = y;
}
}
然后,实现接口的真实类(将替换为真实类):
以及一个类,用于使用希尔伯特曲线将IEnumerable
转换为ICoordinate
的空间索引集合:
public class SpatialIndex {
SortedList<ulong, List<ICoordinate>> orderedData;
List<ulong> orderedIndexes;
public SpatialIndex(IEnumerable<ICoordinate> data) {
orderedData = data.GroupBy(d => d.HilbertIndex()).ToSortedList(g => g.Key, g => g.ToList());
orderedIndexes = orderedData.Keys.ToList();
}
public ICoordinate FindNearest(ICoordinate aPoint) {
var hi = aPoint.HilbertIndex();
var nearestIndex = orderedIndexes.FindNearestIndex(hi);
var nearestGuess = orderedData.Values[nearestIndex][0];
var guessDist = (nearestGuess.Longitude - aPoint.Longitude) * (nearestGuess.Longitude - aPoint.Longitude) + (nearestGuess.Latitude - aPoint.Latitude) * (nearestGuess.Latitude - aPoint.Latitude);
if (nearestIndex > 0) {
var tryGuess = orderedData.Values[nearestIndex-1][0];
var tryDist = (tryGuess.Longitude - aPoint.Longitude) * (tryGuess.Longitude - aPoint.Longitude) + (tryGuess.Latitude - aPoint.Latitude) * (tryGuess.Latitude - aPoint.Latitude);
if (tryDist < guessDist) {
nearestGuess = tryGuess;
guessDist = tryDist;
}
}
var offsetPOI = new PointOfInterest(guessDist, guessDist);
var minhi = (aPoint.Minus(offsetPOI)).HilbertIndex();
var minhii = orderedIndexes.FindNearestIndex(minhi);
if (minhii > 0)
--minhii;
var maxhi = (aPoint.Plus(offsetPOI)).HilbertIndex();
var maxhii = orderedIndexes.FindNearestIndex(maxhi);
for (int j2 = minhii; j2 < maxhii; ++j2) {
var tryList = orderedData.Values[j2];
for (int j3 = 0; j3 < tryList.Count; ++j3) {
var y = tryList[j3];
var ydist = (y.Longitude - aPoint.Longitude) * (y.Longitude - aPoint.Longitude) + (y.Latitude - aPoint.Latitude) * (y.Latitude - aPoint.Latitude);
if (ydist < guessDist) {
nearestGuess = y;
guessDist = ydist;
}
}
}
return nearestGuess;
}
}
更新:我修改了“查找最近点”算法,以获取索引上目标点上方和下方的最近点,而不是仅尝试上面的一个。这提供了另一个不错的加速。您可以尝试通过使用网格将平面划分为相等的正方形来缩小每次搜索的范围。每个元素将存储到一个合适的正方形的桶。然后,您可以使用此网格执行搜索,从包含搜索点的正方形开始,向外螺旋,直到在螺旋的周长中找到一个或多个填充的正方形。这是一个简单的算法,但在特定条件下可以表现得出奇地好:
public class SpatialDictionary<T> : IEnumerable<T>
{
private readonly Dictionary<(int, int), List<T>> _dictionary;
private readonly double _squareSize;
private readonly Func<T, (double, double)> _locationSelector;
private int _count;
public int Count => _count;
public SpatialDictionary(
double squareSize, Func<T, (double, double)> locationSelector)
{
if (squareSize <= 0)
throw new ArgumentOutOfRangeException(nameof(squareSize));
_squareSize = squareSize;
_locationSelector = locationSelector
?? throw new ArgumentNullException(nameof(locationSelector));
_dictionary = new Dictionary<(int, int), List<T>>();
}
public void Add(T item)
{
var (itemX, itemY) = _locationSelector(item);
int keyX = checked((int)(itemX / _squareSize));
int keyY = checked((int)(itemY / _squareSize));
if (!_dictionary.TryGetValue((keyX, keyY), out var bucket))
{
bucket = new List<T>(1);
_dictionary.Add((keyX, keyY), bucket);
}
bucket.Add(item);
_count++;
}
public T FindClosest(double x, double y)
{
if (_count == 0) throw new InvalidOperationException();
int keyX = checked((int)(x / _squareSize));
int keyY = checked((int)(y / _squareSize));
double minDistance = Double.PositiveInfinity;
T minItem = default;
int radius = 0;
while (true)
{
checked { radius++; }
foreach (var square in GetSquares(keyX, keyY, radius))
{
if (!_dictionary.TryGetValue(square, out var bucket)) continue;
foreach (var item in bucket)
{
var (itemX, itemY) = _locationSelector(item);
var distX = x - itemX;
var distY = y - itemY;
var distance = Math.Abs(distX * distX + distY * distY);
if (distance < minDistance)
{
minDistance = distance;
minItem = item;
}
}
}
if (minDistance != Double.PositiveInfinity) return minItem;
}
}
private IEnumerable<(int, int)> GetSquares(int x, int y, int radius)
{
if (radius == 1) yield return (x, y);
for (int i = -radius; i < radius; i++)
{
yield return checked((x + i, y + radius));
yield return checked((x - i, y - radius));
yield return checked((x + radius, y - i));
yield return checked((x - radius, y + i));
}
}
public IEnumerator<T> GetEnumerator()
=> _dictionary.Values.SelectMany(b => b).GetEnumerator();
IEnumerator IEnumerable.GetEnumerator() => GetEnumerator();
}
公共类空间索引:IEnumerable
{
私人只读词典;
私有只读双平方码;
私有只读函数位置选择器;
私人国际单位计数;
公共整数计数=>\u计数;
公共空间小说(
双平方码,函数位置选择器)
{
if(squareSize _dictionary.Values.SelectMany(b=>b).GetEnumerator();
IEnumerator IEnumerable.GetEnumerator()=>GetEnumerator();
}
用法示例:
var spatialData = new SpatialDictionary<DataRow>(squareSize: 10.0,
dr => (dr.Field<double>("Easting"), dr.Field<double>("Northing")));
foreach (DataRow dataRow in dataTable.Rows)
{
spatialData.Add(dataRow);
}
DataRow result = spatialData.FindClosest(100.0, 100.0);
var spatialData=新的空间索引(squareSize:10.0,
dr=>(dr.Field(“东”),dr.Field(“北”);
foreach(dataTable.Rows中的DataRow-DataRow)
{
添加(数据行);
}
DataRow结果=spatialData.FindClosest(100.0100.0);
一个简单的循环,自己比较值,而不是依赖Linq,怎么样?它引入了可能导致性能下降的开销loss@BiesiGrr:这样,一旦进入“可接受”状态,我就可以打破循环距离我的点的范围?不一定,它只是减少了开销。而且,对于相同的样本,你要多次计算距离。因为平方根只取距离函数中的正整数,并且平方根在该范围内是单调的,当你试图找到两个样本之间的最小距离时,你不需要平方根我更新了我的答案,这可能是进行大量查找的最佳解决方案。
public class SpatialIndex {
SortedList<ulong, List<ICoordinate>> orderedData;
List<ulong> orderedIndexes;
public SpatialIndex(IEnumerable<ICoordinate> data) {
orderedData = data.GroupBy(d => d.HilbertIndex()).ToSortedList(g => g.Key, g => g.ToList());
orderedIndexes = orderedData.Keys.ToList();
}
public ICoordinate FindNearest(ICoordinate aPoint) {
var hi = aPoint.HilbertIndex();
var nearestIndex = orderedIndexes.FindNearestIndex(hi);
var nearestGuess = orderedData.Values[nearestIndex][0];
var guessDist = (nearestGuess.Longitude - aPoint.Longitude) * (nearestGuess.Longitude - aPoint.Longitude) + (nearestGuess.Latitude - aPoint.Latitude) * (nearestGuess.Latitude - aPoint.Latitude);
if (nearestIndex > 0) {
var tryGuess = orderedData.Values[nearestIndex-1][0];
var tryDist = (tryGuess.Longitude - aPoint.Longitude) * (tryGuess.Longitude - aPoint.Longitude) + (tryGuess.Latitude - aPoint.Latitude) * (tryGuess.Latitude - aPoint.Latitude);
if (tryDist < guessDist) {
nearestGuess = tryGuess;
guessDist = tryDist;
}
}
var offsetPOI = new PointOfInterest(guessDist, guessDist);
var minhi = (aPoint.Minus(offsetPOI)).HilbertIndex();
var minhii = orderedIndexes.FindNearestIndex(minhi);
if (minhii > 0)
--minhii;
var maxhi = (aPoint.Plus(offsetPOI)).HilbertIndex();
var maxhii = orderedIndexes.FindNearestIndex(maxhi);
for (int j2 = minhii; j2 < maxhii; ++j2) {
var tryList = orderedData.Values[j2];
for (int j3 = 0; j3 < tryList.Count; ++j3) {
var y = tryList[j3];
var ydist = (y.Longitude - aPoint.Longitude) * (y.Longitude - aPoint.Longitude) + (y.Latitude - aPoint.Latitude) * (y.Latitude - aPoint.Latitude);
if (ydist < guessDist) {
nearestGuess = y;
guessDist = ydist;
}
}
}
return nearestGuess;
}
}
public class SpatialIndexMorton {
SortedList<ulong, List<ICoordinate>> orderedData;
List<ulong> orderedIndexes;
public SpatialIndexMorton(IEnumerable<ICoordinate> data) {
orderedData = data.GroupBy(d => d.MortonCode()).ToSortedList(g => g.Key, g => g.ToList());
orderedIndexes = orderedData.Keys.ToList();
}
public ICoordinate FindNearest(ICoordinate aPoint) {
var mc = aPoint.MortonCode();
var nearestIndex = orderedIndexes.FindNearestIndex(mc);
var nearestGuess = orderedData.Values[nearestIndex][0];
var guessDist = (nearestGuess.Longitude - aPoint.Longitude) * (nearestGuess.Longitude - aPoint.Longitude) + (nearestGuess.Latitude - aPoint.Latitude) * (nearestGuess.Latitude - aPoint.Latitude);
if (nearestIndex > 0) {
var tryGuess = orderedData.Values[nearestIndex-1][0];
var tryDist = (tryGuess.Longitude - aPoint.Longitude) * (tryGuess.Longitude - aPoint.Longitude) + (tryGuess.Latitude - aPoint.Latitude) * (tryGuess.Latitude - aPoint.Latitude);
if (tryDist < guessDist) {
nearestGuess = tryGuess;
guessDist = tryDist;
}
}
var offsetPOI = new PointOfInterest(guessDist, guessDist);
var minmc = (aPoint.Minus(offsetPOI)).MortonCode();
var minmci = orderedIndexes.FindNearestIndex(minmc);
if (minmci > 0)
--minmci;
var maxmc = (aPoint.Plus(offsetPOI)).MortonCode();
var maxmci = orderedIndexes.FindNearestIndex(maxmc);
for (int j2 = minmci; j2 < maxmci; ++j2) {
var tryList = orderedData.Values[j2];
for (int j3 = 0; j3 < tryList.Count; ++j3) {
var y = tryList[j3];
var ydist = (y.Longitude - aPoint.Longitude) * (y.Longitude - aPoint.Longitude) + (y.Latitude - aPoint.Latitude) * (y.Latitude - aPoint.Latitude);
if (ydist < guessDist) {
nearestGuess = y;
guessDist = ydist;
}
}
}
return nearestGuess;
}
}
public static class ListExt {
public static int FindNearestIndex<T>(this List<T> l, T possibleKey) {
var keyIndex = l.BinarySearch(possibleKey);
if (keyIndex < 0) {
keyIndex = ~keyIndex;
if (keyIndex == l.Count)
keyIndex = l.Count - 1;
}
return keyIndex;
}
}
public static class IEnumerableExt {
public static SortedList<TKey, TValue> ToSortedList<T, TKey, TValue>(this IEnumerable<T> src, Func<T, TKey> keySelector, Func<T, TValue> valueSelector) =>
new SortedList<TKey, TValue>(src.ToDictionary(keySelector, valueSelector));
}
var hilbertIndex = new SpatialIndex(data);
var ans = new (ICoordinate, ICoordinate)[lookups];
for (int j1 = 0; j1 < lookups; ++j1) {
ICoordinate pld = plds[j1];
ans[j1] = (pld, hilbertIndex.FindNearest(pld));
}
public class SpatialDictionary<T> : IEnumerable<T>
{
private readonly Dictionary<(int, int), List<T>> _dictionary;
private readonly double _squareSize;
private readonly Func<T, (double, double)> _locationSelector;
private int _count;
public int Count => _count;
public SpatialDictionary(
double squareSize, Func<T, (double, double)> locationSelector)
{
if (squareSize <= 0)
throw new ArgumentOutOfRangeException(nameof(squareSize));
_squareSize = squareSize;
_locationSelector = locationSelector
?? throw new ArgumentNullException(nameof(locationSelector));
_dictionary = new Dictionary<(int, int), List<T>>();
}
public void Add(T item)
{
var (itemX, itemY) = _locationSelector(item);
int keyX = checked((int)(itemX / _squareSize));
int keyY = checked((int)(itemY / _squareSize));
if (!_dictionary.TryGetValue((keyX, keyY), out var bucket))
{
bucket = new List<T>(1);
_dictionary.Add((keyX, keyY), bucket);
}
bucket.Add(item);
_count++;
}
public T FindClosest(double x, double y)
{
if (_count == 0) throw new InvalidOperationException();
int keyX = checked((int)(x / _squareSize));
int keyY = checked((int)(y / _squareSize));
double minDistance = Double.PositiveInfinity;
T minItem = default;
int radius = 0;
while (true)
{
checked { radius++; }
foreach (var square in GetSquares(keyX, keyY, radius))
{
if (!_dictionary.TryGetValue(square, out var bucket)) continue;
foreach (var item in bucket)
{
var (itemX, itemY) = _locationSelector(item);
var distX = x - itemX;
var distY = y - itemY;
var distance = Math.Abs(distX * distX + distY * distY);
if (distance < minDistance)
{
minDistance = distance;
minItem = item;
}
}
}
if (minDistance != Double.PositiveInfinity) return minItem;
}
}
private IEnumerable<(int, int)> GetSquares(int x, int y, int radius)
{
if (radius == 1) yield return (x, y);
for (int i = -radius; i < radius; i++)
{
yield return checked((x + i, y + radius));
yield return checked((x - i, y - radius));
yield return checked((x + radius, y - i));
yield return checked((x - radius, y + i));
}
}
public IEnumerator<T> GetEnumerator()
=> _dictionary.Values.SelectMany(b => b).GetEnumerator();
IEnumerator IEnumerable.GetEnumerator() => GetEnumerator();
}
var spatialData = new SpatialDictionary<DataRow>(squareSize: 10.0,
dr => (dr.Field<double>("Easting"), dr.Field<double>("Northing")));
foreach (DataRow dataRow in dataTable.Rows)
{
spatialData.Add(dataRow);
}
DataRow result = spatialData.FindClosest(100.0, 100.0);