Java DBSCAN群集算法工作不正常。我做错了什么？_Java_Data Mining_Cluster Analysis_Dbscan

Java DBSCAN群集算法工作不正常。我做错了什么？

java

Java DBSCAN群集算法工作不正常。我做错了什么？,java,data-mining,cluster-analysis,dbscan,Java,Data Mining,Cluster Analysis,Dbscan,我正试图编写DBSCAN算法来对一组点进行聚类，但结果非常糟糕。这可能是因为数据，但不仅仅如此。我得到了不应该发生的大小

我正试图编写DBSCAN算法来对一组点进行聚类，但结果非常糟糕。这可能是因为数据，但不仅仅如此。我得到了不应该发生的大小我做错了什么？我已经检查了很多次代码，但我不知道问题出在哪里

我参考了计算机上给出的算法

private static int[]dbScan（字符串[]点，intε，int minPts）{
int集群=0；
//如果已访问该点，则访问了门店
boolean[]访问=新的boolean[points.length]；
//点群集存储点已分配给的群集
int[]pointsCluster=新int[points.length]；
对于（int iii=0；iiiminPts
*/
私有静态void expandCluster（字符串[]点、int种子点、哈希集邻居、，
int[]点群集、访问的布尔[]、int群集、intε、int minPts）{
pointsCluster[seedPoint]=cluster；//将群集分配给种子点
//创建队列以处理邻居
队列种子=新的LinkedList（）；
种子。添加所有（邻居）；
而（！seeds.isEmpty（））{
int currentPoint=（整数）seeds.poll（）；
如果（！已访问[currentPoint]）{
已访问[currentPoint]=true；//将邻居标记为已访问
//获取此点的邻居
HashSet currentNeighbors=ε（点，currentPoint，ε）；
//如果currentPoint在邻域中有>=minPts，则将这些点添加到队列中
if（currentNeights.size（）>=minPts）{
种子。添加所有（当前邻居）；
}
}
//如果currentPoint尚未分配群集，请将其分配给当前群集
如果（pointsCluster[currentPoint]=0）pointsCluster[currentPoint]=cluster；
}
}
/*
*返回一个哈希集，其中包含以下点的索引：
*在索引==当前点处点的ε邻域中
*/
私有静态哈希集epsiloneighbors（字符串[]点，int currentPoint，int epsilon）{
HashSet邻居=新HashSet（）；
字符串蛋白质=点[当前点]；
对于（int iii=0；iii=ε）相邻。添加（iii）；
}
回归邻居；
}

当结果不好时，可能是因为数据不好（对于基于密度的聚类），或者是因为参数不好

事实上，如果彼此接触，DBSCAN可以产生比MINPT更小的集群。然后他们可以互相“窃取”边境点

如何使用例如验证你的算法输出？

也考虑看原来的出版物，而不是维基百科！哇，你说得对。我没有想到集群“窃取”边境点。谢谢。所以，从外观上看，算法看起来不错，对吧？我没有详细检查。您的

epsiloneighbors

引用未定义的变量

jjj

。还要注意的是，Java集合对基元类型的性能非常差。你可能真的想试试ELKI，因为它真的很快。是的，

jjj

应该是

currentPoint

。我会调查埃尔基的。谢谢你的帮助。

private static int[] dbScan(String[] points, int epsilon, int minPts) {
    int cluster = 0;
    // visited stores if point has been visited
    boolean[] visited = new boolean[points.length];
    // pointsCluster stores which cluster a point has been assigned to
    int[] pointsCluster = new int[points.length];
    for(int iii = 0; iii < points.length; iii++) {
        // if point iii is already visited, do nothing  
        if(visited[iii]) continue;                      
        visited[iii] = true;    // mark point iii as visited
        // get points in neighborhood of point iii
        HashSet<Integer> neighbors = epsilonNeighbors(points, iii, epsilon);    
        if(neighbors.size() < minPts) {
            // if number of neighbors < minPts, mark point iii as noise
            pointsCluster[iii] = -1;
        } else {
            ++cluster;                      // else, start new cluster
            expandCluster(points, iii, neighbors, pointsCluster, visited, cluster, epsilon, minPts);
        }
    }
    return pointsCluster;
}

/*
 * Expands a cluster if a point is not a noise point
 * and has > minPts in its epsilon neighborhood
 */
private static void expandCluster(String[] points, int seedPoint, HashSet<Integer> neighbors,
        int[] pointsCluster, boolean[] visited, int cluster, int epsilon, int minPts) {

    pointsCluster[seedPoint] = cluster;     //assign cluster to seed point
    // create queue to process neighbors
    Queue<Integer> seeds = new LinkedList<Integer>();
    seeds.addAll(neighbors);
    while(!seeds.isEmpty()) {
        int currentPoint = (Integer) seeds.poll();
        if(!visited[currentPoint]) {
            visited[currentPoint] = true;       // mark neighbor as visited
            // get neighbors of this currentPoint
            HashSet<Integer> currentNeighbors = epsilonNeighbors(points, currentPoint, epsilon);
            // if currentPoint has >= minPts in neighborhood, add those points to the queue
            if(currentNeighbors.size() >= minPts) {
                seeds.addAll(currentNeighbors);
            }
        }
        // if currentPoint has not been assigned a cluster, assign it to the current cluster
        if(pointsCluster[currentPoint] == 0) pointsCluster[currentPoint] = cluster;
    }
}

/*
 * Returns a HashSet containing the indexes of points which are
 * in the epsilon neighborhood of the point at index == currentPoint
 */
private static HashSet<Integer> epsilonNeighbors(String[] points, int currentPoint, int epsilon) {
    HashSet<Integer> neighbors = new HashSet<Integer>();
    String protein = points[currentPoint];
    for(int iii = 0; iii < points.length; iii++) {
        int score = similarity(points[iii], points[jjj]);
        if(score >= epsilon) neighbors.add(iii);
    }
    return neighbors;
}