Warning: file_get_contents(/data/phpspider/zhask/data//catemap/4/r/80.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181

Warning: file_get_contents(/data/phpspider/zhask/data//catemap/4/jquery-ui/2.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
R 找到最高浓度的区域_R - Fatal编程技术网

R 找到最高浓度的区域

R 找到最高浓度的区域,r,R,我有一个大数据集(200万行),其中每行代表一个点,其空间坐标以米为单位(x和y)及其分数。看起来是这样的: my_points <- data.frame(ID = 1:2e6, x = sample(x = 1:1e6, size = 2e6, replace = TRUE), y = sample(x = 1:1e6, size = 2e6, replace = TRUE), Score = sample(x = 1:1e3, size = 2e6, r

我有一个大数据集(200万行),其中每行代表一个点,其空间坐标以米为单位(x和y)及其分数。看起来是这样的:

my_points <- data.frame(ID = 1:2e6, 
    x = sample(x = 1:1e6, size = 2e6, replace = TRUE), 
    y = sample(x = 1:1e6, size = 2e6, replace = TRUE), 
    Score = sample(x = 1:1e3, size = 2e6, replace = TRUE))

head(my_points)
# ID      x      y Score
#  1  21984 628151    54
#  2 675714  27715   431
#  3 273248 127287    47
#  4 659750 795394   921
#  5 478142 417083   416
#  6 783249 440782   253

my_points好的-我认为我的解决方案可行,但速度太慢了

library(Rcpp)

sourceCpp(code = '
  #include <Rcpp.h>

  using namespace Rcpp;

  // determine, if a point is in a polygon
  bool pnp(NumericVector vertx, NumericVector verty, float testx, float testy) {

    int nvert = vertx.size();
    bool c = FALSE;
    int i, j = 0;

    for (i = 0, j = nvert-1; i < nvert; j = i++) {
      if ( ((verty[i]>testy) != (verty[j]>testy)) &&
           (testx < (vertx[j]-vertx[i]) * (testy-verty[i]) / (verty[j]-verty[i]) + vertx[i]) )
        c = !c;
    }

    return c;
  }

  // create a circle polygon (36 corners) around a point with a certain radius
  NumericMatrix circle(float centerx, float centery, float radius){

    int pnum = 36;
    double rotation = 2 * 3.14159 / pnum;
    NumericMatrix res(36, 2);

    for (int p1 = 0; p1 < pnum; ++p1) {
        double rot = p1 * rotation;
        res(p1, 0) = centerx + cos(rot) * radius;
        res(p1, 1) = centery + sin(rot) * radius;
    }

    return res;
  }

  // create a vector with the circle score sum of each point 
  // [[Rcpp::export]]
  NumericVector searchmaxclust(DataFrame points) {

    Function asMatrix("as.matrix");

    SEXP points2m = points;
    NumericMatrix pm = asMatrix(points2m);

    NumericVector co(pm.nrow());

    for (int p1 = 0; p1 < pm.nrow(); p1++) {
      NumericVector curp = pm(p1,_);
      NumericMatrix circ = circle(curp(1), curp(2), 100.0);

      for (int p2 = 0; p2 < pm.nrow(); p2++) {
        NumericVector curp2 = pm(p2,_);
        bool isin = pnp(circ(_,0), circ(_,1), curp2(1), curp2(2));

        if (isin) {
          co(p1) = co(p1) + curp2(3);
        }

      }

    }

    return co;
  }
')
库(Rcpp)
sourceCpp(代码=)
#包括
使用名称空间Rcpp;
//确定点是否位于多边形中
布尔pnp(数值向量顶点X、数值向量顶点Y、浮点测试X、浮点测试Y){
int nvert=vertx.size();
boolc=假;
int i,j=0;
对于(i=0,j=nvert-1;itesty)!=(verty[j]>testy))&&
(testx<(vertx[j]-vertx[i])*(testy-verty[i])/(verty[j]-verty[i])+vertx[i]))
c=!c;
}
返回c;
}
//围绕具有特定半径的点创建圆形多边形(36个角)
数字矩阵圆(浮动中心X、浮动中心Y、浮动半径){
int pnum=36;
双旋转=2*3.14159/pnum;
数字矩阵res(36,2);
对于(int p1=0;p1
我使用Rcpp来加快速度——算法非常简单

  • 围绕每个点创建一个圆形多边形
  • 检查所有其他点是否在圆多边形内,并将正确点的所有分数相加

  • 1000点大约需要0.6秒。我想这意味着,你的200万分大约需要一个月的时间。嗯,我还是决定把这个发出去。也许它能帮助别人

    谢谢你@nevrome!但是,在您的解决方案中,您仅测试作为圆心的初始点。但是收集最高分数的区域中心没有理由成为我初始数据的一个点…哦-好的。另一个想法是:也许使用光栅软件包进行图像分析是可行的。他们已经为光栅数据实现了非常快速的算法。