Java DBSCAN群集算法工作不正常。我做错了什么?

Java DBSCAN群集算法工作不正常。我做错了什么?,java,data-mining,cluster-analysis,dbscan,Java,Data Mining,Cluster Analysis,Dbscan,我正试图编写DBSCAN算法来对一组点进行聚类,但结果非常糟糕。这可能是因为数据,但不仅仅如此。我得到了不应该发生的大小

我正试图编写DBSCAN算法来对一组点进行聚类,但结果非常糟糕。这可能是因为数据,但不仅仅如此。我得到了不应该发生的大小 我做错了什么?我已经检查了很多次代码,但我不知道问题出在哪里

我参考了计算机上给出的算法

private static int[]dbScan(字符串[]点,intε,int minPts){
int集群=0;
//如果已访问该点,则访问了门店
boolean[]访问=新的boolean[points.length];
//点群集存储点已分配给的群集
int[]pointsCluster=新int[points.length];
对于(int iii=0;iiiminPts
*/
私有静态void expandCluster(字符串[]点、int种子点、哈希集邻居、,
int[]点群集、访问的布尔[]、int群集、intε、int minPts){
pointsCluster[seedPoint]=cluster;//将群集分配给种子点
//创建队列以处理邻居
队列种子=新的LinkedList();
种子。添加所有(邻居);
而(!seeds.isEmpty()){
int currentPoint=(整数)seeds.poll();
如果(!已访问[currentPoint]){
已访问[currentPoint]=true;//将邻居标记为已访问
//获取此点的邻居
HashSet currentNeighbors=ε(点,currentPoint,ε);
//如果currentPoint在邻域中有>=minPts,则将这些点添加到队列中
if(currentNeights.size()>=minPts){
种子。添加所有(当前邻居);
}
}
//如果currentPoint尚未分配群集,请将其分配给当前群集
如果(pointsCluster[currentPoint]=0)pointsCluster[currentPoint]=cluster;
}
}
/*
*返回一个哈希集,其中包含以下点的索引:
*在索引==当前点处点的ε邻域中
*/
私有静态哈希集epsiloneighbors(字符串[]点,int currentPoint,int epsilon){
HashSet邻居=新HashSet();
字符串蛋白质=点[当前点];
对于(int iii=0;iii=ε)相邻。添加(iii);
}
回归邻居;
}

当结果不好时,可能是因为数据不好(对于基于密度的聚类),或者是因为参数不好

事实上,如果彼此接触,DBSCAN可以产生比MINPT更小的集群。然后他们可以互相“窃取”边境点


如何使用例如验证你的算法输出?

也考虑看原来的出版物,而不是维基百科!哇,你说得对。我没有想到集群“窃取”边境点。谢谢。所以,从外观上看,算法看起来不错,对吧?我没有详细检查。您的
epsiloneighbors
引用未定义的变量
jjj
。还要注意的是,Java集合对基元类型的性能非常差。你可能真的想试试ELKI,因为它真的很快。是的,
jjj
应该是
currentPoint
。我会调查埃尔基的。谢谢你的帮助。
private static int[] dbScan(String[] points, int epsilon, int minPts) {
    int cluster = 0;
    // visited stores if point has been visited
    boolean[] visited = new boolean[points.length];
    // pointsCluster stores which cluster a point has been assigned to
    int[] pointsCluster = new int[points.length];
    for(int iii = 0; iii < points.length; iii++) {
        // if point iii is already visited, do nothing  
        if(visited[iii]) continue;                      
        visited[iii] = true;    // mark point iii as visited
        // get points in neighborhood of point iii
        HashSet<Integer> neighbors = epsilonNeighbors(points, iii, epsilon);    
        if(neighbors.size() < minPts) {
            // if number of neighbors < minPts, mark point iii as noise
            pointsCluster[iii] = -1;
        } else {
            ++cluster;                      // else, start new cluster
            expandCluster(points, iii, neighbors, pointsCluster, visited, cluster, epsilon, minPts);
        }
    }
    return pointsCluster;
}

/*
 * Expands a cluster if a point is not a noise point
 * and has > minPts in its epsilon neighborhood
 */
private static void expandCluster(String[] points, int seedPoint, HashSet<Integer> neighbors,
        int[] pointsCluster, boolean[] visited, int cluster, int epsilon, int minPts) {

    pointsCluster[seedPoint] = cluster;     //assign cluster to seed point
    // create queue to process neighbors
    Queue<Integer> seeds = new LinkedList<Integer>();
    seeds.addAll(neighbors);
    while(!seeds.isEmpty()) {
        int currentPoint = (Integer) seeds.poll();
        if(!visited[currentPoint]) {
            visited[currentPoint] = true;       // mark neighbor as visited
            // get neighbors of this currentPoint
            HashSet<Integer> currentNeighbors = epsilonNeighbors(points, currentPoint, epsilon);
            // if currentPoint has >= minPts in neighborhood, add those points to the queue
            if(currentNeighbors.size() >= minPts) {
                seeds.addAll(currentNeighbors);
            }
        }
        // if currentPoint has not been assigned a cluster, assign it to the current cluster
        if(pointsCluster[currentPoint] == 0) pointsCluster[currentPoint] = cluster;
    }
}

/*
 * Returns a HashSet containing the indexes of points which are
 * in the epsilon neighborhood of the point at index == currentPoint
 */
private static HashSet<Integer> epsilonNeighbors(String[] points, int currentPoint, int epsilon) {
    HashSet<Integer> neighbors = new HashSet<Integer>();
    String protein = points[currentPoint];
    for(int iii = 0; iii < points.length; iii++) {
        int score = similarity(points[iii], points[jjj]);
        if(score >= epsilon) neighbors.add(iii);
    }
    return neighbors;
}