Warning: file_get_contents(/data/phpspider/zhask/data//catemap/3/arrays/12.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
C 不必声明一个巨大的2D数组,找到平均大小分布的有效方法是什么?_C_Arrays_Multidimensional Array_Average - Fatal编程技术网

C 不必声明一个巨大的2D数组,找到平均大小分布的有效方法是什么?

C 不必声明一个巨大的2D数组,找到平均大小分布的有效方法是什么?,c,arrays,multidimensional-array,average,C,Arrays,Multidimensional Array,Average,考虑一个lxlsize矩阵M,其条目可以是0或1。每个元素都是概率为p的1和概率为1-p的0。我将标记为1的元素称为黑色元素,标记为0的元素称为白色元素。我正在尝试编写一个代码: 生成包含条目0和1的随机矩阵。我需要输入矩阵L的大小和概率p 用相同的编号标记属于同一簇的所有黑色元素。(将黑色元素簇定义为单元格图中的最大连接组件,值为1,其中边连接行和列最多相差1的单元格(因此每个单元格最多有八个相邻单元格)换言之,如果矩阵中的两个黑色元素共享一个边或顶点,则认为它们属于同一个黑色簇。也就是说,

考虑一个lxlsize矩阵M,其条目可以是0或1。每个元素都是概率为p的1和概率为1-p的0。我将标记为1的元素称为黑色元素,标记为0的元素称为白色元素。我正在尝试编写一个代码:

  • 生成包含条目0和1的随机矩阵。我需要输入矩阵L的大小和概率p

  • 用相同的编号标记属于同一簇的所有黑色元素。(将黑色元素簇定义为单元格图中的最大连接组件,值为1,其中边连接行和列最多相差1的单元格(因此每个单元格最多有八个相邻单元格)换言之,如果矩阵中的两个黑色元素共享一个边或顶点,则认为它们属于同一个黑色簇。也就是说,把矩阵看成是一个大的方块,而元素则是大方格中的小方格。)

  • 在一个从p=0到p=100的循环中(我指的是概率p的百分比):每个大小的黑色簇的数量被写入一个与p值对应的文件中
例如:

输入:p=30,L=50

输出(针对每个p写入数据文件;因此该程序创建了101个数据文件,从p=0到p=100):

1100(即有100个大小为1的黑色簇)

2 20(即有20个大小为2的黑色簇)

3 15(即有15个大小为3的黑色簇)

等等

#include "stdafx.h"
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <time.h>
#include <string.h>
int *labels;
int  n_labels = 0;

int uf_find(int x) {
    int y = x;
    while (labels[y] != y)
        y = labels[y];

    while (labels[x] != x) {
        int z = labels[x];
        labels[x] = y;
        x = z;
    }
    return y;
}

int uf_union(int x, int y) {
    return labels[uf_find(x)] = uf_find(y);
}

int uf_make_set(void) {
    labels[0] ++;
    assert(labels[0] < n_labels);
    labels[labels[0]] = labels[0];
    return labels[0];
}

void uf_initialize(int max_labels) {
    n_labels = max_labels;
    labels = (int*)calloc(sizeof(int), n_labels);
    labels[0] = 0;
}

void uf_done(void) {
    free(labels);
    labels = 0;
}

#define max(a,b) (a>b?a:b)
#define min(a,b) (a>b?b:a)

int hoshen_kopelman(int **matrix, int m, int n) {

    uf_initialize(m * n / 2);

    for (int i = 0; i<m; i++)
        for (int j = 0; j<n; j++)
            if (matrix[i][j]) {

                int up = (i == 0 ? 0 : matrix[i - 1][j]);
                int left = (j == 0 ? 0 : matrix[i][j - 1]);

                switch (!!up + !!left) {

                case 0:
                    matrix[i][j] = uf_make_set();
                    break;

                case 1:
                    matrix[i][j] = max(up, left);
                    break;

                case 2:
                    matrix[i][j] = uf_union(up, left);
                    break;
                }
                int north_west;
                if (i == 0 || j == 0)
                    north_west = 0;
                else
                    north_west = matrix[i - 1][j - 1];

                int north_east;
                if (i == 0 || j == (n - 1))
                    north_east = 0;
                else
                    north_east = matrix[i - 1][j + 1];

                if (!!north_west == 1)
                {
                    if (i != 0 && j != 0)
                    {
                        //if (rand() % 2 == 1)
                        //{
                            if (matrix[i][j - 1] == 0 && matrix[i - 1][j] == 0)
                            {
                                if (!!matrix[i][j] == 0)
                                    matrix[i][j] = north_west;
                                else
                                    uf_union(north_west, matrix[i][j]);
                            }
                        //}

                    }
                    else if (i == 0 || j == 0)
                    {

                    }

                }
                if (!!north_east == 1)
                {
                    if (i != 0 && j != n - 1)
                    {
                        //if (rand() % 2 == 1)
                        //{
                            if (matrix[i - 1][j] == 0 && matrix[i][j + 1] == 0)
                            {
                                if (!!matrix[i][j] == 0)
                                    matrix[i][j] = north_east;
                                else
                                    uf_union(north_east, matrix[i][j]);
                            }
                        //}
                    }
                    else if (i == 0 || j == n - 1)
                    {

                    }
                }
            }
    int *new_labels = (int*)calloc(sizeof(int), n_labels);

    for (int i = 0; i<m; i++)
        for (int j = 0; j<n; j++)
            if (matrix[i][j]) {
                int x = uf_find(matrix[i][j]);
                if (new_labels[x] == 0) {
                    new_labels[0]++;
                    new_labels[x] = new_labels[0];
                }
                matrix[i][j] = new_labels[x];
            }

    int total_clusters = new_labels[0];

    free(new_labels);
    uf_done();

    return total_clusters;
}

void check_labelling(int **matrix, int m, int n) {
    int N, S, E, W;
    for (int i = 0; i<m; i++)
        for (int j = 0; j<n; j++)
            if (matrix[i][j]) {
                N = (i == 0 ? 0 : matrix[i - 1][j]);
                S = (i == m - 1 ? 0 : matrix[i + 1][j]);
                E = (j == n - 1 ? 0 : matrix[i][j + 1]);
                W = (j == 0 ? 0 : matrix[i][j - 1]);

                assert(N == 0 || matrix[i][j] == N);
                assert(S == 0 || matrix[i][j] == S);
                assert(E == 0 || matrix[i][j] == E);
                assert(W == 0 || matrix[i][j] == W);
            }
}

int cluster_count(int **matrix, int size, int N)
{
    int i;
    int j;
    int count = 0;
    for (i = 0; i < size; i++)
    {
        for (j = 0; j < size; j++)
        {
            if (matrix[i][j] == N)
                count++;
        }
    }
    return count;
}

int main(int argc, char **argv)
{
    srand((unsigned int)time(0));
    int p = 0;
    printf("Enter number of rows/columns: ");
    int size = 0;
    scanf("%d", &size);
    printf("\n");
    FILE *fp;

    printf("Enter number of averaging iterations: ");
    int iterations = 0;

    scanf("%d", &iterations);

        for (int p = 0; p <= 100; p++)
        { 
            char str[100];
            sprintf(str, "BlackSizeDistribution%03i.txt", p);
            fp = fopen(str, "w");
            int **matrix;
            matrix = (int**)calloc(10, sizeof(int*));
            int** matrix_new = (int**)calloc(10, sizeof(int*));
            matrix_new = (int **)realloc(matrix, sizeof(int*) * size);
            matrix = matrix_new;
        for (int i = 0; i < size; i++)
        {
            matrix[i] = (int *)calloc(size, sizeof(int));
            for (int j = 0; j < size; j++)
            {
                int z = rand() % 100;
                z = z + 1;
                if (p == 0)
                    matrix[i][j] = 0;
                if (z <= p)
                    matrix[i][j] = 1;
                else
                    matrix[i][j] = 0;
            }
        }

        hoshen_kopelman(matrix, size, size);
        int highest = matrix[0][0];
        for (int i = 0; i < size; i++)
            for (int j = 0; j < size; j++)
                if (highest < matrix[i][j])
                    highest = matrix[i][j];
        int* counter = (int*)calloc(sizeof(int*), highest + 1);
        int high = 0;
        for (int k = 1; k <= highest; k++)
        {
            counter[k] = cluster_count(matrix, size, k);
            if (counter[k] > high)
                high = counter[k];
        }
        int* size_distribution = (int*)calloc(sizeof(int*), high + 1);

        for (int y = 1; y <= high; y++)
        {
           int count = 0;
           for (int z = 1; z <= highest; z++)
               if (counter[z] == y)
                   count++;
           size_distribution[y] = count;
        }

        for (int k = 1; k <= high; k++)
        {
            fprintf(fp, "\n%d\t%d", k, size_distribution[k]);
            printf("\n%d\t%d", k, size_distribution[k]);
        }
        printf("\n");
        check_labelling(matrix, size, size);
        for (int i = 0; i < size; i++)
            free(matrix[i]);
        free(matrix);
        }
}
我想做一些类似的事情,只是我需要在50或100次迭代中进行平均,以获得我的数学模型的准确值。顺便说一下,我并不需要为每个迭代输出值,即迭代1、迭代2、迭代3。。。在数据文件中。我只需要“打印”上表中的第一列和最后一列(具有平均值)

现在,我可能需要修改主函数来进行平均

以下是主要功能:

int main(int argc, char **argv)
    {
        srand((unsigned int)time(0));
        int p = 0;
        printf("Enter number of rows/columns: ");
        int size = 0;
        scanf("%d", &size);
        printf("\n");
        FILE *fp;

        printf("Enter number of averaging iterations: ");
        int iterations = 0;

        scanf("%d", &iterations);

            for (int p = 0; p <= 100; p++)
            { 
                char str[100];
                sprintf(str, "BlackSizeDistribution%03i.txt", p);
                fp = fopen(str, "w");
                int **matrix;
                matrix = (int**)calloc(10, sizeof(int*));
                int** matrix_new = (int**)calloc(10, sizeof(int*));
                matrix_new = (int **)realloc(matrix, sizeof(int*) * size);
                matrix = matrix_new;
            for (int i = 0; i < size; i++)
            {
                matrix[i] = (int *)calloc(size, sizeof(int));
                for (int j = 0; j < size; j++)
                {
                    int z = rand() % 100;
                    z = z + 1;
                    if (p == 0)
                        matrix[i][j] = 0;
                    if (z <= p)
                        matrix[i][j] = 1;
                    else
                        matrix[i][j] = 0;
                }
            }

            hoshen_kopelman(matrix, size, size);
            int highest = matrix[0][0];
            for (int i = 0; i < size; i++)
                for (int j = 0; j < size; j++)
                    if (highest < matrix[i][j])
                        highest = matrix[i][j];
            int* counter = (int*)calloc(sizeof(int*), highest + 1);
            int high = 0;
            for (int k = 1; k <= highest; k++)
            {
                counter[k] = cluster_count(matrix, size, k);
                if (counter[k] > high)
                    high = counter[k];
            }
            int* size_distribution = (int*)calloc(sizeof(int*), high + 1);

            for (int y = 1; y <= high; y++)
            {
               int count = 0;
               for (int z = 1; z <= highest; z++)
                   if (counter[z] == y)
                       count++;
               size_distribution[y] = count;
            }

            for (int k = 1; k <= high; k++)
            {
                fprintf(fp, "\n%d\t%d", k, size_distribution[k]);
                printf("\n%d\t%d", k, size_distribution[k]);
            }
            printf("\n");
            check_labelling(matrix, size, size);
            for (int i = 0; i < size; i++)
                free(matrix[i]);
            free(matrix);
            }
    }
int main(int argc,char**argv)
{
srand((无符号整数)时间(0));
int p=0;
printf(“输入行/列数:”);
int size=0;
scanf(“%d”,大小(&S);
printf(“\n”);
文件*fp;
printf(“输入平均迭代次数:”);
int迭代次数=0;
scanf(“%d”,迭代次数(&d));

因为(intp=0;p啊,一个非常贴近我内心的话题!)

因为您正在收集统计信息,并且确切的配置仅在收集所述统计信息的持续时间内才有意义,所以您只需要两个L×L矩阵:一个用于保存颜色信息(一个白色,0到L×L黑色,因此一个类型可以保存0到L2+1之间的整数,包括0和L2+1);另一个用于保存该类型元素的数量(0和L2之间的整数,包括0和L2)

要在多次迭代中为每个L和p值保存每个大小的黑色簇的数量,您需要第三个L×L+1元素数组;但请注意,如果使用相同的L和p进行N次迭代,其值可以达到N×L×L

标准C伪随机数生成器非常糟糕;不要使用它。请使用或我最喜欢的(或相关的Xorshift1024*)。虽然Xorshift64*非常快,但它的分布对于像您这样的模拟来说足够“随机”

与其只存储“黑色”或“白色”,不如存储“无趣”或群集ID;最初,所有(未连接的)黑色都有一个唯一的群集ID。这样,您就可以实现一个群集,并将连接的单元格高效地连接到群集

下面是我将如何用伪代码实现这一点:

Let id[L*L]        be the color information; index is L*row+col
Let idcount[L*L+1] be the id count over one iteration
Let count[L*L+1]   be the number of clusters across N iterations

Let limit = (p / 100.0) * PRNG_MAX

Let white = L*L    (and blacks are 0 to L*L-1, inclusive)
Clear count[] to all zeros

For iter = 1 to N, inclusive:

    For row = 0 to L-1, inclusive:

        If PRNG() <= limit:
            Let id[L*row] = L*row
        Else:
            Let id[L*row] = white
        End If

        For col = 1 to L-1, inclusive:
            If PRNG() <= limit:
                If id[L*row+col-1] == white:
                    Let id[L*row+col] = L*row + col
                Else:
                    Let id[L*row+col] = id[L*row+col-1]
                End If
            Else:
                Let id[L*row+col] = white
            End If
        End For

    End For
当然,您应该组合循环,以保持尽可能好的访问位置。(分别在一个单独的内部循环中执行
col==0
,并将
row==0
,最小化
if
子句,因为它们往往比较慢。)您甚至可以创建
id
数组(L+1)尺寸为2,用白色填充单元格的外边缘,以简化上述连接,使其成为一个单双环,成本为4L+4个额外使用的单元格

此时,您需要展平每个不相交的集合。每个非
白色的ID值都是
L*row+col
(意思是“this”),或者是对另一个单元格的引用。展平意味着对于每个ID,我们在链中找到最终ID,并使用它:

    For i = 0 to L*L-1, inclusive:
        If (id[i] != white):
            Let k = i
            While (id[k] != k):
                k = id[k]
            End While
            id[i] = k
        End If
    End For
接下来,我们需要计算具有特定ID的单元数:

    Clear idn[]

    For i = 0 to L*L-1, inclusive:
        Increment idn[id[i]]
    End For
因为我们对N次迭代中特定大小的集群数量感兴趣,所以我们可以简单地通过在该迭代中找到的每个特定大小的集群更新
计数
数组:

    For i = 1 to L*L, inclusive:
        Increment count[idn[i]]
    End For
    Let count[0] = 0

End For
此时,
count[i]
包含在
N
迭代中发现的
i
单元格的黑色簇数;换句话说,它是在N次迭代中看到的簇大小的直方图(离散分布)


上述措施的一种实施方式如下

(第一个实现有数组大小和簇标签的问题;这个版本将这个部分拆分成一个单独的文件,并且更容易验证正确性。但是,这只是第二个版本,所以我不认为它是生产质量)。 首先,操作矩阵的函数,cluster.h

#ifndef   CLUSTER_H
#define   CLUSTER_H
#include <stdlib.h>
#include <inttypes.h>
#include <string.h>
#include <stdio.h>
#include <time.h>

/*
   This is in the public domain.
   Written by Nominal Animal <question@nominal-animal.net>
*/

typedef  uint32_t  cluster_label;
typedef  uint32_t  cluster_size;

static size_t  rows = 0;    /* Number of mutable rows */
static size_t  cols = 0;    /* Number of mutable columns */
static size_t  nrows = 0;   /* Number of accessible rows == rows + 1 */
static size_t  ncols = 0;   /* Number of accessible columns == cols + 1 */
static size_t  cells = 0;   /* Total number of cells == nrows*ncols */
static size_t  empty = 0;   /* Highest-numbered label == nrows*ncols - 1 */
static size_t  sizes = 0;   /* Number of possible cluster sizes == rows*cols + 1 */
#define  INDEX(row, col)  (ncols*(row) + (col))

cluster_label   *label  = NULL;  /* 2D contents: label[ncols*(row) + (col)] */
cluster_size    *occurs = NULL;  /* Number of occurrences of each label value */

/* Xorshift64* PRNG used for generating the matrix.
*/
static uint64_t  prng_state = 1;

static inline uint64_t  prng_u64(void)
{
    uint64_t  state = prng_state;
    state ^= state >> 12;
    state ^= state << 25;
    state ^= state >> 27;
    prng_state = state;
    return state * UINT64_C(2685821657736338717);
}

static inline uint64_t  prng_u64_less(void)
{
    uint64_t  result;
    do {
        result = prng_u64();
    } while (result == UINT64_C(18446744073709551615));
    return result;
}

static inline uint64_t  prng_seed(const uint64_t  seed)
{
    if (seed)
        prng_state = seed;
    else
        prng_state = 1;

    return prng_state;
}

static inline uint64_t  prng_randomize(void)
{
    int       discard = 1024;
    uint64_t  seed;
    FILE     *in;

    /* By default, use time. */
    prng_seed( ((uint64_t)time(NULL) * 2832631) ^
               ((uint64_t)clock() * 1120939) );

#ifdef __linux__
    /* On Linux, read from /dev/urandom. */
    in = fopen("/dev/urandom", "r");
    if (in) {
        if (fread(&seed, sizeof seed, 1, in) == 1)
            prng_seed(seed);
        fclose(in);        
    }
#endif

    /* Churn the state a bit. */
    seed = prng_state;
    while (discard-->0) {
        seed ^= seed >> 12;
        seed ^= seed << 25;
        seed ^= seed >> 27;
    }
    return prng_state = seed;
}




static inline void cluster_free(void)
{
    free(occurs); occurs = NULL;
    free(label);  label = NULL;
    rows = 0;
    cols = 0;
    nrows = 0;
    ncols = 0;
    cells = 0;
    empty = 0;
    sizes = 0;
}


static int cluster_init(const size_t want_cols, const size_t want_rows)
{
    cluster_free();

    if (want_cols < 1 || want_rows < 1)
        return -1; /* Invalid number of rows or columns */

    rows = want_rows;
    cols = want_cols;

    nrows = rows + 1;
    ncols = cols + 1;

    cells = nrows * ncols;

    if ((size_t)(cells / nrows) != ncols ||
        nrows <= rows || ncols <= cols)
        return -1; /* Size overflows */

    empty = nrows * ncols - 1;

    sizes = rows * cols + 1;

    label  = calloc(cells, sizeof label[0]);
    occurs = calloc(cells, sizeof occurs[0]);
    if (!label || !occurs) {
        cluster_free();
        return -1; /* Not enough memory. */
    }

    return 0;
}


static void join2(size_t i1, size_t i2)
{
    size_t  root1 = i1, root2 = i2, root;

    while (root1 != label[root1])
        root1 = label[root1];

    while (root2 != label[root2])
        root2 = label[root2];

    root = root1;
    if (root > root2)
        root = root2;

    while (label[i1] != root) {
        const size_t  temp = label[i1];
        label[i1] = root;
        i1 = temp;
    }

    while (label[i2] != root) {
        const size_t  temp = label[i2];
        label[i2] = root;
        i2 = temp;
    }
}

static void join3(size_t i1, size_t i2, size_t i3)
{
    size_t  root1 = i1, root2 = i2, root3 = i3, root;

    while (root1 != label[root1])
        root1 = label[root1];

    while (root2 != label[root2])
        root2 = label[root2];

    while (root3 != label[root3])
        root3 = label[root3];

    root = root1;
    if (root > root2)
        root = root2;
    if (root > root3)
        root = root3;

    while (label[i1] != root) {
        const size_t  temp = label[i1];
        label[i1] = root;
        i1 = temp;
    }

    while (label[i2] != root) {
        const size_t  temp = label[i2];
        label[i2] = root;
        i2 = temp;
    }

    while (label[i3] != root) {
        const size_t  temp = label[i3];
        label[i3] = root;
        i3 = temp;
    }
}

static void join4(size_t i1, size_t i2, size_t i3, size_t i4)
{
    size_t  root1 = i1, root2 = i2, root3 = i3, root4 = i4, root;

    while (root1 != label[root1])
        root1 = label[root1];

    while (root2 != label[root2])
        root2 = label[root2];

    while (root3 != label[root3])
        root3 = label[root3];

    while (root4 != label[root4])
        root4 = label[root4];

    root = root1;
    if (root > root2)
        root = root2;
    if (root > root3)
        root = root3;
    if (root > root4)
        root = root4;

    while (label[i1] != root) {
        const size_t  temp = label[i1];
        label[i1] = root;
        i1 = temp;
    }

    while (label[i2] != root) {
        const size_t  temp = label[i2];
        label[i2] = root;
        i2 = temp;
    }

    while (label[i3] != root) {
        const size_t  temp = label[i3];
        label[i3] = root;
        i3 = temp;
    }

    while (label[i4] != root) {
        const size_t  temp = label[i4];
        label[i4] = root;
        i4 = temp;
    }
}

static void cluster_fill(const uint64_t plimit)
{
    size_t  r, c;

    /* Fill the matrix with the specified probability.
       For efficiency, we use the same label for all
       horizontally consecutive cells.
    */
    for (r = 0; r < rows; r++) {
        const size_t  imax = ncols*r + cols;
        cluster_label last;
        size_t        i = ncols*r;

        last = (prng_u64_less() < plimit) ? i : empty;
        label[i] = last;

        while (++i < imax) {
            if (prng_u64_less() < plimit) {
                if (last == empty)
                    last = i;
            } else
                last = empty;

            label[i] = last;
        }

        label[imax] = empty;
    }

    /* Fill the extra row with empty, too. */
    {
        cluster_label       *ptr = label + ncols*rows;
        cluster_label *const end = label + ncols*nrows;
        while (ptr < end)
            *(ptr++) = empty;
    }

    /* On the very first row, we need to join non-empty cells
       vertically and diagonally down. */
    for (c = 0; c < cols; c++)
        switch ( ((label[c]         < empty) << 0) |
                 ((label[c+ncols]   < empty) << 1) |
                 ((label[c+ncols+1] < empty) << 2) ) {
            /*  <1>    *
             *   2  4  */
        case 3: /* Join down */
            join2(c, c+ncols); break;
        case 5: /* Join down right */
            join2(c, c+ncols+1); break;
        case 7: /* Join down and down right */
            join3(c, c+ncols, c+ncols+1); break;
        }

    /* On the other rows, we need to join non-empty cells
       vertically, diagonally down, and diagonally up. */
    for (r = 1; r < rows; r++) {
        const size_t  imax = ncols*r + cols;
        size_t        i;
        for (i = ncols*r; i < imax; i++)
            switch ( ((label[i]         < empty) << 0) |
                     ((label[i-ncols+1] < empty) << 1) |
                     ((label[i+ncols]   < empty) << 2) |
                     ((label[i+ncols+1] < empty) << 3) ) {
            /*      2  *
             *  <1>    *
             *   4  8  */
            case 3: /* Diagonally up */
                join2(i, i-ncols+1); break;
            case 5: /* Down */
                join2(i, i+ncols); break;
            case 7: /* Down and diagonally up */
                join3(i, i-ncols+1, i+ncols); break;
            case 9: /* Diagonally down */
                join2(i, i+ncols+1); break;
            case 11: /* Diagonally up and diagonally down */
                join3(i, i-ncols+1, i+ncols+1); break;
            case 13: /* Down and diagonally down */
                join3(i, i+ncols, i+ncols+1); break;
            case 15: /* Down, diagonally down, and diagonally up */
                join4(i, i-ncols+1, i+ncols, i+ncols+1); break;
            }
    }

    /* Flatten the labels, so that all cells belonging to the
       same cluster will have the same label. */
    {
        const size_t  imax = ncols*rows;
        size_t        i;
        for (i = 0; i < imax; i++)
            if (label[i] < empty) {
                size_t  k = i, kroot = i;

                while (kroot != label[kroot])
                    kroot = label[kroot];

                while (label[k] != kroot) {
                    const size_t  temp = label[k];
                    label[k] = kroot;
                    k = temp;
                }
            }
    }

    /* Count the number of occurrences of each label. */
    memset(occurs, 0, (empty + 1) * sizeof occurs[0]);
    {
        cluster_label *const end = label + ncols*rows;
        cluster_label       *ptr = label;

        while (ptr < end)
            ++occurs[*(ptr++)];
    }
}

#endif /* CLUSTER_H */
#include <stdlib.h>
#include <inttypes.h>
#include <stdio.h>
#include "cluster.h"

/*
   This program is in the public domain.
   Written by Nominal Animal <question@nominal-animal.net>
*/

int main(int argc, char *argv[])
{
    uint64_t       plimit;
    uint32_t      *color;
    unsigned long  size, r, c;
    double         probability;
    char           dummy;

    if (argc != 3) {
        fprintf(stderr, "\n");
        fprintf(stderr, "Usage: %s SIZE PROBABILITY > matrix.ppm\n", argv[0]);
        fprintf(stderr, "\n");
        fprintf(stderr, "Where  SIZE         sets the matrix size (SIZE x SIZE),\n");
        fprintf(stderr, "       PROBABILITY  is the probability [0, 1] of a cell\n");
        fprintf(stderr, "                    being non-empty.\n");
        fprintf(stderr, "\n");
        return EXIT_FAILURE;
    }

    if (sscanf(argv[1], " %lu %c", &size, &dummy) != 1 || size < 1u) {
        fprintf(stderr, "%s: Invalid matrix size.\n", argv[1]);
        return EXIT_FAILURE;
    }

    if (sscanf(argv[2], " %lf %c", &probability, &dummy) != 1 || probability < 0.0 || probability > 1.0) {
        fprintf(stderr, "%s: Invalid probability.\n", argv[2]);
        return EXIT_FAILURE;
    }

    /* Technically, we want plimit = (uint64_t)(0.5 + 18446744073709551615.0*p),
       but doubles do not have the precision for that. */
    if (probability > 0.9999999999999999)
        plimit = UINT64_C(18446744073709551615);
    else
    if (probability <= 0)
        plimit = UINT64_C(0);
    else
        plimit = (uint64_t)(18446744073709551616.0 * probability);

    /* Try allocating memory for the matrix and the label count array. */
    if (size > 2147483646u || cluster_init(size, size)) {
        fprintf(stderr, "%s: Matrix size is too large.\n", argv[1]);
        return EXIT_FAILURE;
    }

    /* Try allocating memory for the color lookup array. */
    color = calloc(empty + 1, sizeof color[0]);
    if (!color) {
        fprintf(stderr, "%s: Matrix size is too large.\n", argv[1]);
        return EXIT_FAILURE;
    }

    fprintf(stderr, "Using Xorshift64* seed %" PRIu64 "\n", prng_randomize());
    fflush(stderr);

    /* Construct the matrix. */
    cluster_fill(plimit);

    {
        size_t  i;

        color[empty] = 0xFFFFFFu;

        /* Assign random colors. */
        for (i = 0; i < empty; i++)
            color[i] = prng_u64() >> 40;
    }

    printf("P6\n%lu %lu 255\n", size, size);
    for (r = 0; r < rows; r++)
        for (c = 0; c < cols; c++) {
            const uint32_t  rgb = color[label[ncols*r + c]];
            fputc((rgb >> 16) & 255, stdout);
            fputc((rgb >> 8)  & 255, stdout);
            fputc( rgb        & 255, stdout);
        }
    fflush(stdout);

    fprintf(stderr, "Done.\n");

    return EXIT_SUCCESS;
}
程序以Gnuplot兼容的格式输出集群大小分布(直方图),从几个注释行开始,以
#
开头,以指示用于计算结果的确切参数

输出中的第一列是集群大小(集群中的单元数),第二列是迭代期间发现的此类集群的精确计数,第三列是observ
#ifndef   CLUSTER_H
#define   CLUSTER_H
#include <stdlib.h>
#include <inttypes.h>
#include <string.h>
#include <stdio.h>
#include <time.h>

/*
   This is in the public domain.
   Written by Nominal Animal <question@nominal-animal.net>
*/

typedef  uint32_t  cluster_label;
typedef  uint32_t  cluster_size;

static size_t  rows = 0;    /* Number of mutable rows */
static size_t  cols = 0;    /* Number of mutable columns */
static size_t  nrows = 0;   /* Number of accessible rows == rows + 1 */
static size_t  ncols = 0;   /* Number of accessible columns == cols + 1 */
static size_t  cells = 0;   /* Total number of cells == nrows*ncols */
static size_t  empty = 0;   /* Highest-numbered label == nrows*ncols - 1 */
static size_t  sizes = 0;   /* Number of possible cluster sizes == rows*cols + 1 */
#define  INDEX(row, col)  (ncols*(row) + (col))

cluster_label   *label  = NULL;  /* 2D contents: label[ncols*(row) + (col)] */
cluster_size    *occurs = NULL;  /* Number of occurrences of each label value */

/* Xorshift64* PRNG used for generating the matrix.
*/
static uint64_t  prng_state = 1;

static inline uint64_t  prng_u64(void)
{
    uint64_t  state = prng_state;
    state ^= state >> 12;
    state ^= state << 25;
    state ^= state >> 27;
    prng_state = state;
    return state * UINT64_C(2685821657736338717);
}

static inline uint64_t  prng_u64_less(void)
{
    uint64_t  result;
    do {
        result = prng_u64();
    } while (result == UINT64_C(18446744073709551615));
    return result;
}

static inline uint64_t  prng_seed(const uint64_t  seed)
{
    if (seed)
        prng_state = seed;
    else
        prng_state = 1;

    return prng_state;
}

static inline uint64_t  prng_randomize(void)
{
    int       discard = 1024;
    uint64_t  seed;
    FILE     *in;

    /* By default, use time. */
    prng_seed( ((uint64_t)time(NULL) * 2832631) ^
               ((uint64_t)clock() * 1120939) );

#ifdef __linux__
    /* On Linux, read from /dev/urandom. */
    in = fopen("/dev/urandom", "r");
    if (in) {
        if (fread(&seed, sizeof seed, 1, in) == 1)
            prng_seed(seed);
        fclose(in);        
    }
#endif

    /* Churn the state a bit. */
    seed = prng_state;
    while (discard-->0) {
        seed ^= seed >> 12;
        seed ^= seed << 25;
        seed ^= seed >> 27;
    }
    return prng_state = seed;
}




static inline void cluster_free(void)
{
    free(occurs); occurs = NULL;
    free(label);  label = NULL;
    rows = 0;
    cols = 0;
    nrows = 0;
    ncols = 0;
    cells = 0;
    empty = 0;
    sizes = 0;
}


static int cluster_init(const size_t want_cols, const size_t want_rows)
{
    cluster_free();

    if (want_cols < 1 || want_rows < 1)
        return -1; /* Invalid number of rows or columns */

    rows = want_rows;
    cols = want_cols;

    nrows = rows + 1;
    ncols = cols + 1;

    cells = nrows * ncols;

    if ((size_t)(cells / nrows) != ncols ||
        nrows <= rows || ncols <= cols)
        return -1; /* Size overflows */

    empty = nrows * ncols - 1;

    sizes = rows * cols + 1;

    label  = calloc(cells, sizeof label[0]);
    occurs = calloc(cells, sizeof occurs[0]);
    if (!label || !occurs) {
        cluster_free();
        return -1; /* Not enough memory. */
    }

    return 0;
}


static void join2(size_t i1, size_t i2)
{
    size_t  root1 = i1, root2 = i2, root;

    while (root1 != label[root1])
        root1 = label[root1];

    while (root2 != label[root2])
        root2 = label[root2];

    root = root1;
    if (root > root2)
        root = root2;

    while (label[i1] != root) {
        const size_t  temp = label[i1];
        label[i1] = root;
        i1 = temp;
    }

    while (label[i2] != root) {
        const size_t  temp = label[i2];
        label[i2] = root;
        i2 = temp;
    }
}

static void join3(size_t i1, size_t i2, size_t i3)
{
    size_t  root1 = i1, root2 = i2, root3 = i3, root;

    while (root1 != label[root1])
        root1 = label[root1];

    while (root2 != label[root2])
        root2 = label[root2];

    while (root3 != label[root3])
        root3 = label[root3];

    root = root1;
    if (root > root2)
        root = root2;
    if (root > root3)
        root = root3;

    while (label[i1] != root) {
        const size_t  temp = label[i1];
        label[i1] = root;
        i1 = temp;
    }

    while (label[i2] != root) {
        const size_t  temp = label[i2];
        label[i2] = root;
        i2 = temp;
    }

    while (label[i3] != root) {
        const size_t  temp = label[i3];
        label[i3] = root;
        i3 = temp;
    }
}

static void join4(size_t i1, size_t i2, size_t i3, size_t i4)
{
    size_t  root1 = i1, root2 = i2, root3 = i3, root4 = i4, root;

    while (root1 != label[root1])
        root1 = label[root1];

    while (root2 != label[root2])
        root2 = label[root2];

    while (root3 != label[root3])
        root3 = label[root3];

    while (root4 != label[root4])
        root4 = label[root4];

    root = root1;
    if (root > root2)
        root = root2;
    if (root > root3)
        root = root3;
    if (root > root4)
        root = root4;

    while (label[i1] != root) {
        const size_t  temp = label[i1];
        label[i1] = root;
        i1 = temp;
    }

    while (label[i2] != root) {
        const size_t  temp = label[i2];
        label[i2] = root;
        i2 = temp;
    }

    while (label[i3] != root) {
        const size_t  temp = label[i3];
        label[i3] = root;
        i3 = temp;
    }

    while (label[i4] != root) {
        const size_t  temp = label[i4];
        label[i4] = root;
        i4 = temp;
    }
}

static void cluster_fill(const uint64_t plimit)
{
    size_t  r, c;

    /* Fill the matrix with the specified probability.
       For efficiency, we use the same label for all
       horizontally consecutive cells.
    */
    for (r = 0; r < rows; r++) {
        const size_t  imax = ncols*r + cols;
        cluster_label last;
        size_t        i = ncols*r;

        last = (prng_u64_less() < plimit) ? i : empty;
        label[i] = last;

        while (++i < imax) {
            if (prng_u64_less() < plimit) {
                if (last == empty)
                    last = i;
            } else
                last = empty;

            label[i] = last;
        }

        label[imax] = empty;
    }

    /* Fill the extra row with empty, too. */
    {
        cluster_label       *ptr = label + ncols*rows;
        cluster_label *const end = label + ncols*nrows;
        while (ptr < end)
            *(ptr++) = empty;
    }

    /* On the very first row, we need to join non-empty cells
       vertically and diagonally down. */
    for (c = 0; c < cols; c++)
        switch ( ((label[c]         < empty) << 0) |
                 ((label[c+ncols]   < empty) << 1) |
                 ((label[c+ncols+1] < empty) << 2) ) {
            /*  <1>    *
             *   2  4  */
        case 3: /* Join down */
            join2(c, c+ncols); break;
        case 5: /* Join down right */
            join2(c, c+ncols+1); break;
        case 7: /* Join down and down right */
            join3(c, c+ncols, c+ncols+1); break;
        }

    /* On the other rows, we need to join non-empty cells
       vertically, diagonally down, and diagonally up. */
    for (r = 1; r < rows; r++) {
        const size_t  imax = ncols*r + cols;
        size_t        i;
        for (i = ncols*r; i < imax; i++)
            switch ( ((label[i]         < empty) << 0) |
                     ((label[i-ncols+1] < empty) << 1) |
                     ((label[i+ncols]   < empty) << 2) |
                     ((label[i+ncols+1] < empty) << 3) ) {
            /*      2  *
             *  <1>    *
             *   4  8  */
            case 3: /* Diagonally up */
                join2(i, i-ncols+1); break;
            case 5: /* Down */
                join2(i, i+ncols); break;
            case 7: /* Down and diagonally up */
                join3(i, i-ncols+1, i+ncols); break;
            case 9: /* Diagonally down */
                join2(i, i+ncols+1); break;
            case 11: /* Diagonally up and diagonally down */
                join3(i, i-ncols+1, i+ncols+1); break;
            case 13: /* Down and diagonally down */
                join3(i, i+ncols, i+ncols+1); break;
            case 15: /* Down, diagonally down, and diagonally up */
                join4(i, i-ncols+1, i+ncols, i+ncols+1); break;
            }
    }

    /* Flatten the labels, so that all cells belonging to the
       same cluster will have the same label. */
    {
        const size_t  imax = ncols*rows;
        size_t        i;
        for (i = 0; i < imax; i++)
            if (label[i] < empty) {
                size_t  k = i, kroot = i;

                while (kroot != label[kroot])
                    kroot = label[kroot];

                while (label[k] != kroot) {
                    const size_t  temp = label[k];
                    label[k] = kroot;
                    k = temp;
                }
            }
    }

    /* Count the number of occurrences of each label. */
    memset(occurs, 0, (empty + 1) * sizeof occurs[0]);
    {
        cluster_label *const end = label + ncols*rows;
        cluster_label       *ptr = label;

        while (ptr < end)
            ++occurs[*(ptr++)];
    }
}

#endif /* CLUSTER_H */
#include <stdlib.h>
#include <inttypes.h>
#include <stdio.h>
#include "cluster.h"

/*
   This program is in the public domain.
   Written by Nominal Animal <question@nominal-animal.net>
*/

int main(int argc, char *argv[])
{
    uint64_t      *count = NULL;
    uint64_t       plimit;
    unsigned long  size, iterations, i;
    double         probability;
    char           dummy;

    if (argc != 4) {
        fprintf(stderr, "\n");
        fprintf(stderr, "Usage: %s SIZE ITERATIONS PROBABILITY\n", argv[0]);
        fprintf(stderr, "\n");
        fprintf(stderr, "Where  SIZE         sets the matrix size (SIZE x SIZE),\n");
        fprintf(stderr, "       ITERATIONS   is the number of iterations, and\n");
        fprintf(stderr, "       PROBABILITY  is the probability [0, 1] of a cell\n");
        fprintf(stderr, "                    being non-empty.\n");
        fprintf(stderr, "\n");
        return EXIT_FAILURE;
    }

    if (sscanf(argv[1], " %lu %c", &size, &dummy) != 1 || size < 1u) {
        fprintf(stderr, "%s: Invalid matrix size.\n", argv[1]);
        return EXIT_FAILURE;
    }

    if (sscanf(argv[2], " %lu %c", &iterations, &dummy) != 1 || iterations < 1u) {
        fprintf(stderr, "%s: Invalid number of iterations.\n", argv[2]);
        return EXIT_FAILURE;
    }

    if (sscanf(argv[3], " %lf %c", &probability, &dummy) != 1 || probability < 0.0 || probability > 1.0) {
        fprintf(stderr, "%s: Invalid probability.\n", argv[3]);
        return EXIT_FAILURE;
    }

    /* Technically, we want plimit = (uint64_t)(0.5 + 18446744073709551615.0*p),
       but doubles do not have the precision for that. */
    if (probability > 0.9999999999999999)
        plimit = UINT64_C(18446744073709551615);
    else
    if (probability <= 0)
        plimit = UINT64_C(0);
    else
        plimit = (uint64_t)(18446744073709551616.0 * probability);

    /* Try allocating memory for the matrix and the label count array. */
    if (size > 2147483646u || cluster_init(size, size)) {
        fprintf(stderr, "%s: Matrix size is too large.\n", argv[1]);
        return EXIT_FAILURE;
    }

    /* Try allocating memory for the cluster size histogram. */
    count = calloc(sizes + 1, sizeof count[0]);
    if (!count) {
        fprintf(stderr, "%s: Matrix size is too large.\n", argv[1]);
        return EXIT_FAILURE;
    }

    printf("# Xorshift64* seed is %" PRIu64 "\n", prng_randomize());
    printf("# Matrix size is %lu x %lu\n", size, size);
    printf("# Probability of a cell to be non-empty is %.6f%%\n",
           100.0 * ((double)plimit / 18446744073709551615.0));
    printf("# Collecting statistics over %lu iterations\n", iterations);
    printf("# Size Count CountPerIteration\n");
    fflush(stdout);

    for (i = 0u; i < iterations; i++) {
        size_t  k = empty;

        cluster_fill(plimit);

        /* Add cluster sizes to the histogram. */
        while (k-->0)
            ++count[occurs[k]];
    }

    /* Print the histogram of cluster sizes. */
    {
        size_t k = 1;

        printf("%zu %" PRIu64 " %.3f\n", k, count[k], (double)count[k] / (double)iterations);

        for (k = 2; k < sizes; k++)
            if (count[k-1] != 0 || count[k] != 0 || count[k+1] != 0)
                printf("%zu %" PRIu64 " %.3f\n", k, count[k], (double)count[k] / (double)iterations);
    }

    return EXIT_SUCCESS;
}
#include <stdlib.h>
#include <inttypes.h>
#include <stdio.h>
#include "cluster.h"

/*
   This program is in the public domain.
   Written by Nominal Animal <question@nominal-animal.net>
*/

int main(int argc, char *argv[])
{
    uint64_t       plimit;
    uint32_t      *color;
    unsigned long  size, r, c;
    double         probability;
    char           dummy;

    if (argc != 3) {
        fprintf(stderr, "\n");
        fprintf(stderr, "Usage: %s SIZE PROBABILITY > matrix.ppm\n", argv[0]);
        fprintf(stderr, "\n");
        fprintf(stderr, "Where  SIZE         sets the matrix size (SIZE x SIZE),\n");
        fprintf(stderr, "       PROBABILITY  is the probability [0, 1] of a cell\n");
        fprintf(stderr, "                    being non-empty.\n");
        fprintf(stderr, "\n");
        return EXIT_FAILURE;
    }

    if (sscanf(argv[1], " %lu %c", &size, &dummy) != 1 || size < 1u) {
        fprintf(stderr, "%s: Invalid matrix size.\n", argv[1]);
        return EXIT_FAILURE;
    }

    if (sscanf(argv[2], " %lf %c", &probability, &dummy) != 1 || probability < 0.0 || probability > 1.0) {
        fprintf(stderr, "%s: Invalid probability.\n", argv[2]);
        return EXIT_FAILURE;
    }

    /* Technically, we want plimit = (uint64_t)(0.5 + 18446744073709551615.0*p),
       but doubles do not have the precision for that. */
    if (probability > 0.9999999999999999)
        plimit = UINT64_C(18446744073709551615);
    else
    if (probability <= 0)
        plimit = UINT64_C(0);
    else
        plimit = (uint64_t)(18446744073709551616.0 * probability);

    /* Try allocating memory for the matrix and the label count array. */
    if (size > 2147483646u || cluster_init(size, size)) {
        fprintf(stderr, "%s: Matrix size is too large.\n", argv[1]);
        return EXIT_FAILURE;
    }

    /* Try allocating memory for the color lookup array. */
    color = calloc(empty + 1, sizeof color[0]);
    if (!color) {
        fprintf(stderr, "%s: Matrix size is too large.\n", argv[1]);
        return EXIT_FAILURE;
    }

    fprintf(stderr, "Using Xorshift64* seed %" PRIu64 "\n", prng_randomize());
    fflush(stderr);

    /* Construct the matrix. */
    cluster_fill(plimit);

    {
        size_t  i;

        color[empty] = 0xFFFFFFu;

        /* Assign random colors. */
        for (i = 0; i < empty; i++)
            color[i] = prng_u64() >> 40;
    }

    printf("P6\n%lu %lu 255\n", size, size);
    for (r = 0; r < rows; r++)
        for (c = 0; c < cols; c++) {
            const uint32_t  rgb = color[label[ncols*r + c]];
            fputc((rgb >> 16) & 255, stdout);
            fputc((rgb >> 8)  & 255, stdout);
            fputc( rgb        & 255, stdout);
        }
    fflush(stdout);

    fprintf(stderr, "Done.\n");

    return EXIT_SUCCESS;
}
#ifndef   CLUSTERS_H
#define   CLUSTERS_H
#include <stdlib.h>
#include <inttypes.h>
#include <limits.h>
#include <time.h>

/* This file is in public domain. No guarantees, no warranties.
   Written by Nominal Animal <question@nominal-animal.net>.
*/

/* For pure C89 compilers, use '-DSTATIC_INLINE=static' at compile time. */
#ifndef  STATIC_INLINE
#define  STATIC_INLINE  static inline
#endif

#define  ERR_INVALID   -1   /* Invalid function parameter */
#define  ERR_TOOLARGE  -2   /* Matrix size is too large */
#define  ERR_NOMEM     -3   /* Out of memory */

typedef  unsigned char  cluster_color;

typedef  uint32_t  cluster_label;
typedef  uint64_t  cluster_count;

#define  CLUSTER_WHITE  0
#define  CLUSTER_BLACK  1
#define  CLUSTER_NONE   UCHAR_MAX   /* Reserved */

#define  FMT_COLOR  "u"
#define  FMT_LABEL  PRIu32
#define  FMT_COUNT  PRIu64


typedef struct {
    uint64_t        state;
} prng;

typedef struct {
    /* Pseudo-random number generator used */
    prng            rng;

    /* Actual size of the matrix */
    cluster_label   rows;
    cluster_label   cols;

    /* Number of matrices the histograms have been collected from */
    cluster_count   iterations;

    /* Probability of each cell being black */
    uint64_t        p_black;

    /* Probability of diagonal connections */
    uint64_t        d_black;
    uint64_t        d_white;

    /* Cluster colormap contains (rows+2) rows and (cols+1) columns */
    cluster_color  *map;

    /* Disjoint set of (rows) rows and (cols) columns */
    cluster_label  *djs;

    /* Number of occurrences per disjoint set root */
    cluster_label  *white_roots;
    cluster_label  *black_roots;

    /* Histograms of white and black clusters */
    cluster_count  *white_histogram;
    cluster_count  *black_histogram;
} cluster;
#define  CLUSTER_INITIALIZER  { {0}, 0, 0, 0, 0.0, 0.0, 0.0, NULL, NULL, NULL, NULL, NULL, NULL }

/* Calculate uint64_t limit corresponding to probability p. */
STATIC_INLINE uint64_t  probability_limit(const double p)
{
    if (p <= 0.0)
        return UINT64_C(0);
    else
    if (p <= 0.5)
        return (uint64_t)(p * 18446744073709551615.0);
    else
    if (p >= 1.0)
        return UINT64_C(18446744073709551615);
    else
        return UINT64_C(18446744073709551615) - (uint64_t)((double)(1.0 - p) * 18446744073709551615.0);
}

/* Return true at probability corresponding to limit 'limit'.
   This implements a Xorshift64* pseudo-random number generator. */
STATIC_INLINE int  probability(prng *const rng, const uint64_t limit)
{
    uint64_t  state = rng->state;
    uint64_t  value;

    /* To correctly cover the entire range, we ensure we never generate a zero. */
    do {
        state ^= state >> 12;
        state ^= state << 25;
        state ^= state >> 27;
        value  = state * UINT64_C(2685821657736338717);
    } while (!value);

    rng->state = state;

    return (value <= limit) ? CLUSTER_BLACK : CLUSTER_WHITE;
}

/* Generate a random seed for the Xorshift64* pseudo-random number generator. */
static uint64_t  randomize(prng *const rng)
{
    unsigned int  rounds = 127;
    uint64_t      state = UINT64_C(3069887672279) * (uint64_t)time(NULL)
                        ^ UINT64_C(60498839) * (uint64_t)clock();
    if (!state)
        state = 1;

    /* Churn the state a bit. */
    while (rounds-->0) {
        state ^= state >> 12;
        state ^= state << 25;
        state ^= state >> 27;
    }

    if (rng)
        rng->state = state;

    return state;
}

/* Free all resources related to a cluster. */
STATIC_INLINE void free_cluster(cluster *c)
{
    if (c) {
        /* Free dynamically allocated pointers. Note: free(NULL) is safe. */
        free(c->map);
        free(c->djs);
        free(c->white_roots);
        free(c->black_roots);
        free(c->white_histogram);
        free(c->black_histogram);
        c->rng.state       = 0;
        c->rows            = 0;
        c->cols            = 0;
        c->iterations      = 0;
        c->p_black         = 0;
        c->d_white         = 0;
        c->d_black         = 0;
        c->map             = NULL;
        c->djs             = NULL;
        c->white_roots     = 0;
        c->black_roots     = 0;
        c->white_histogram = NULL;
        c->black_histogram = NULL;
    }
}

/* Initialize cluster structure, for a matrix of specified size. */
static int init_cluster(cluster *c, const int rows, const int cols,
                        const double p_black,
                        const double d_white, const double d_black)
{
    const cluster_label  label_cols = cols;
    const cluster_label  label_rows = rows;
    const cluster_label  color_rows = rows + 2;
    const cluster_label  color_cols = cols + 1;
    const cluster_label  color_cells = color_rows * color_cols;
    const cluster_label  label_cells = label_rows * label_cols;
    const cluster_label  labels = label_cells + 2; /* One extra! */

    if (!c)
        return ERR_INVALID;

    c->rng.state       = 0; /* Invalid seed for Xorshift64*. */
    c->rows            = 0;
    c->cols            = 0;
    c->iterations      = 0;
    c->p_black         = 0;
    c->d_white         = 0;
    c->d_black         = 0;
    c->map             = NULL;
    c->djs             = NULL;
    c->white_roots     = NULL;
    c->black_roots     = NULL;
    c->white_histogram = NULL;
    c->black_histogram = NULL;

    if (rows < 1 || cols < 1)
        return ERR_INVALID;

    if ((unsigned int)color_rows <= (unsigned int)rows ||
        (unsigned int)color_cols <= (unsigned int)cols ||
        (cluster_label)(color_cells / color_rows) != color_cols ||
        (cluster_label)(color_cells / color_cols) != color_rows ||
        (cluster_label)(label_cells / label_rows) != label_cols ||
        (cluster_label)(label_cells / label_cols) != label_rows)
        return ERR_TOOLARGE;

    c->black_histogram = calloc(labels, sizeof (cluster_count));
    c->white_histogram = calloc(labels, sizeof (cluster_count));
    c->black_roots = calloc(labels, sizeof (cluster_label));
    c->white_roots = calloc(labels, sizeof (cluster_label));
    c->djs = calloc(label_cells, sizeof (cluster_label));
    c->map = calloc(color_cells, sizeof (cluster_color));
    if (!c->map || !c->djs ||
        !c->white_roots || !c->black_roots ||
        !c->white_histogram || !c->black_histogram) {
        free(c->map);
        free(c->djs);
        free(c->white_roots);
        free(c->black_roots);
        free(c->white_histogram);
        free(c->black_histogram);
        return ERR_NOMEM;
    }

    c->rows = rows;
    c->cols = cols;

    c->p_black = probability_limit(p_black);
    c->d_white = probability_limit(d_white);
    c->d_black = probability_limit(d_black);

    /* Initialize the color map to NONE. */
    {
        cluster_color        *ptr = c->map;
        cluster_color *const  end = c->map + color_cells;
        while (ptr < end)
            *(ptr++) = CLUSTER_NONE;
    }

    /* calloc() initialized the other arrays to zeros already. */
    return 0;
}

/* Disjoint set: find root. */
STATIC_INLINE cluster_label  djs_root(const cluster_label *const  djs, cluster_label  from)
{
    while (from != djs[from])
        from = djs[from];
    return from;
}

/* Disjoint set: path compression. */
STATIC_INLINE void  djs_path(cluster_label *const  djs, cluster_label  from, const cluster_label  root)
{
    while (from != root) {
        const cluster_label  temp = djs[from];
        djs[from] = root;
        from = temp;
    }
}

/* Disjoint set: Flatten. Returns the root, and flattens the path to it. */
STATIC_INLINE cluster_label  djs_flatten(cluster_label *const  djs, cluster_label  from)
{
    const cluster_label  root = djs_root(djs, from);
    djs_path(djs, from, root);
    return root;
}

/* Disjoint set: Join two subsets. */
STATIC_INLINE void  djs_join2(cluster_label *const  djs,
                              cluster_label  from1,  cluster_label  from2)
{
    cluster_label  root, temp;

    root = djs_root(djs, from1);

    temp = djs_root(djs, from2);
    if (root > temp)
        temp = root;

    djs_path(djs, from1, root);
    djs_path(djs, from2, root);
}

/* Disjoint set: Join three subsets. */
STATIC_INLINE void  djs_join3(cluster_label *const  djs,
                              cluster_label  from1, cluster_label  from2,
                              cluster_label  from3)
{
    cluster_label  root, temp;

    root = djs_root(djs, from1);

    temp = djs_root(djs, from2);
    if (root > temp)
        root = temp;

    temp = djs_root(djs, from3);
    if (root > temp)
        root = temp;

    djs_path(djs, from1, root);
    djs_path(djs, from2, root);
    djs_path(djs, from3, root);
}

/* Disjoint set: Join four subsets. */
STATIC_INLINE void  djs_join4(cluster_label *const  djs,
                              cluster_label  from1, cluster_label  from2,
                              cluster_label  from3, cluster_label  from4)
{
    cluster_label  root, temp;

    root = djs_root(djs, from1);

    temp = djs_root(djs, from2);
    if (root > temp)
        root = temp;

    temp = djs_root(djs, from3);
    if (root > temp)
        root = temp;

    temp = djs_root(djs, from4);
    if (root > temp)
        root = temp;

    djs_path(djs, from1, root);
    djs_path(djs, from2, root);
    djs_path(djs, from3, root);
    djs_path(djs, from4, root);
}

/* Disjoint set: Join five subsets. */
STATIC_INLINE void  djs_join5(cluster_label *const  djs,
                              cluster_label  from1, cluster_label  from2,
                              cluster_label  from3, cluster_label  from4,
                              cluster_label  from5)
{
    cluster_label  root, temp;

    root = djs_root(djs, from1);

    temp = djs_root(djs, from2);
    if (root > temp)
        root = temp;

    temp = djs_root(djs, from3);
    if (root > temp)
        root = temp;

    temp = djs_root(djs, from4);
    if (root > temp)
        root = temp;

    temp = djs_root(djs, from5);
    if (root > temp)
        root = temp;

    djs_path(djs, from1, root);
    djs_path(djs, from2, root);
    djs_path(djs, from3, root);
    djs_path(djs, from4, root);
    djs_path(djs, from5, root);
}

static void iterate(cluster *const cl)
{
    prng          *const  rng = &(cl->rng);
    uint64_t       const  p_black = cl->p_black;
    uint64_t              d_color[2];

    cluster_color *const  map = cl->map + cl->cols + 2;
    cluster_label  const  map_stride = cl->cols + 1;

    cluster_label *const  djs = cl->djs;

    cluster_label        *roots[2];

    cluster_label  const  rows = cl->rows;
    cluster_label  const  cols = cl->cols;

    int                   r, c;

    d_color[CLUSTER_WHITE] = cl->d_white;
    d_color[CLUSTER_BLACK] = cl->d_black;
    roots[CLUSTER_WHITE] = cl->white_roots;
    roots[CLUSTER_BLACK] = cl->black_roots;

    for (r = 0; r < rows; r++) {
        cluster_label  const  curr_i = r * cols;
        cluster_color *const  curr_row = map + r * map_stride;
        cluster_color *const  prev_row = curr_row - map_stride;

        for (c = 0; c < cols; c++) {
            cluster_color  color = probability(rng, p_black);
            cluster_label  label = curr_i + c;
            uint64_t       diag  = d_color[color];
            unsigned int   joins = 0;

            /* Assign the label and color of the current cell, */
            djs[label] = label;
            curr_row[c] = color;

            /* Because we join left, up-left, up, and up-right, and
               all those have been assigned to already, we can do
               the necessary joins right now. */

            /* Join left? */
            joins |= (curr_row[c-1] == color) << 0;

            /* Join up? */
            joins |= (prev_row[c  ] == color) << 1;

            /* Join up left? */
            joins |= (prev_row[c-1] == color && probability(rng, diag)) << 2;

            /* Join up right? */
            joins |= (prev_row[c+1] == color && probability(rng, diag)) << 3;

            /* Do the corresponding joins. */
            switch (joins) {
            case 1: /* Left */
                djs_join2(djs, label, label - 1);
                break;
            case 2: /* Up */
                djs_join2(djs, label, label - cols);
                break;
            case 3: /* Left and up */
                djs_join3(djs, label, label - 1, label - cols);
                break;
            case 4: /* Up-left */
                djs_join2(djs, label, label - cols - 1);
                break;
            case 5: /* Left and up-left */
                djs_join3(djs, label, label - 1, label - cols - 1);
                break;
            case 6: /* Up and up-left */
                djs_join3(djs, label, label - cols, label - cols - 1);
                break;
            case 7: /* Left, up, and up-left */
                djs_join4(djs, label, label - 1, label - cols, label - cols - 1);
                break;
            case 8: /* Up-right */
                djs_join2(djs, label, label - cols + 1);
                break;
            case 9: /* Left and up-right */
                djs_join3(djs, label, label - 1, label - cols + 1);
                break;
            case 10: /* Up and up-right */
                djs_join3(djs, label, label - cols, label - cols + 1);
                break;
            case 11: /* Left, up, and up-right */
                djs_join4(djs, label, label - 1, label - cols, label - cols + 1);
                break;
            case 12: /* Up-left and up-right */
                djs_join3(djs, label, label - cols - 1, label - cols + 1);
                break;
            case 13: /* Left, up-left, and up-right */
                djs_join4(djs, label, label - 1, label - cols - 1, label - cols + 1);
                break;
            case 14: /* Up, up-left, and up-right */
                djs_join4(djs, label, label - cols, label - cols - 1, label - cols + 1);
                break;
            case 15: /* Left, up, up-left, and up-right */
                djs_join5(djs, label, label - 1, label - cols, label - cols - 1, label - cols + 1);
                break;
            }
        }
    }

    /* Count the occurrences of each disjoint-set root label. */
    if (roots[0] && roots[1]) {
        const size_t  labels = rows * cols + 2;

        /* Clear the counts. */
        memset(roots[0], 0, labels * sizeof (cluster_label));
        memset(roots[1], 0, labels * sizeof (cluster_label));

        for (r = 0; r < rows; r++) {
            const cluster_color *const  curr_row = map + r * map_stride;
            const cluster_label         curr_i   = r * cols;
            for (c = 0; c < cols; c++)
                roots[curr_row[c]][djs_flatten(djs, curr_i + c)]++;
        }
    } else {
        size_t  i = rows * cols;
        while (i-->0)
            djs_flatten(djs, i);
    }

    /* Collect the statistics. */
    if (cl->white_histogram && roots[0]) {
        const cluster_label *const  root_count = roots[0];
        cluster_count *const        histogram = cl->white_histogram;
        size_t                      i = rows * cols + 1;
        while (i-->0)
            histogram[root_count[i]]++;
    }
    if (cl->black_histogram && roots[1]) {
        const cluster_label *const  root_count = roots[1];
        cluster_count *const        histogram = cl->black_histogram;
        size_t                      i = rows * cols + 1;
        while (i-->0)
            histogram[root_count[i]]++;
    }

    /* Note: index zero and (rows*cols+1) are zero in the histogram, for ease of scanning. */
    if (cl->white_histogram || cl->black_histogram) {
        const size_t  n = rows * cols + 1;
        if (cl->white_histogram) {
            cl->white_histogram[0] = 0;
            cl->white_histogram[n] = 0;
        }
        if (cl->black_histogram) {
            cl->black_histogram[0] = 0;
            cl->black_histogram[n] = 0;
        }
    }

    /* One more iteration performed. */
    cl->iterations++;
}

#endif /* CLUSTERS_H */
#include <stdlib.h>
#include <inttypes.h>
#include <string.h>
#include <stdio.h>
#include "clusters.h"

/* This file is in public domain. No guarantees, no warranties.
   Written by Nominal Animal <question@nominal-animal.net>.
*/

#define  DEFAULT_ROWS     100
#define  DEFAULT_COLS     100
#define  DEFAULT_P_BLACK  0.0
#define  DEFAULT_D_WHITE  0.0
#define  DEFAULT_D_BLACK  0.0
#define  DEFAULT_ITERS    1

int usage(const char *argv0)
{
    fprintf(stderr, "\n");
    fprintf(stderr, "Usage: %s [ -h | --help ]\n", argv0);
    fprintf(stderr, "       %s OPTIONS [ > output.txt ]\n", argv0);
    fprintf(stderr, "\n");
    fprintf(stderr, "Options:\n");
    fprintf(stderr, "       rows=SIZE   Set number of rows. Default is %d.\n", DEFAULT_ROWS);
    fprintf(stderr, "       cols=SIZE   Set number of columns. Default is %d.\n", DEFAULT_ROWS);
    fprintf(stderr, "       L=SIZE      Set rows=SIZE and cols=SIZE.\n");
    fprintf(stderr, "       black=P     Set the probability of a cell to be black. Default is %g.\n", DEFAULT_P_BLACK);
    fprintf(stderr, "                   All non-black cells are white.\n");
    fprintf(stderr, "       dwhite=P    Set the probability of white cells connecting diagonally.\n");
    fprintf(stderr, "                   Default is %g.\n", DEFAULT_D_WHITE);
    fprintf(stderr, "       dblack=P    Set the probability of black cells connecting diagonally.\n");
    fprintf(stderr, "                   Default is %g.\n", DEFAULT_D_BLACK);
    fprintf(stderr, "       N=COUNT     Number of iterations for gathering statistics. Default is %d.\n", DEFAULT_ITERS);
    fprintf(stderr, "       seed=U64    Set the Xorshift64* pseudorandom number generator seed; nonzero.\n");
    fprintf(stderr, "                   Default is to pick one randomly (based on time).\n");
    fprintf(stderr, "\n");
    fprintf(stderr, "The output consists of comment lines and data lines.\n");
    fprintf(stderr, "Comment lines begin with a #:\n");
    fprintf(stderr, "   # This is a comment line.\n");
    fprintf(stderr, "Each data line contains a cluster size, the number of white clusters of that size\n");
    fprintf(stderr, "observed during iterations, the number of black clusters of that size observed\n");
    fprintf(stderr, "during iterations, and the number of any clusters of that size observed:\n");
    fprintf(stderr, "   SIZE  WHITE_CLUSTERS  BLACK_CLUSTERS  TOTAL_CLUSTERS\n");
    fprintf(stderr, "\n");
    return EXIT_SUCCESS;
}

int main(int argc, char *argv[])
{
    int      rows = DEFAULT_ROWS;
    int      cols = DEFAULT_COLS;
    double   p_black = DEFAULT_P_BLACK;
    double   d_white = DEFAULT_D_WHITE;
    double   d_black = DEFAULT_D_BLACK;
    long     iters = DEFAULT_ITERS;
    uint64_t seed = 0;
    cluster  c = CLUSTER_INITIALIZER;

    int      arg, itemp;
    uint64_t u64temp;
    double   dtemp;
    long     ltemp;
    char     dummy;

    size_t   n;
    size_t   i;

    if (argc < 2)
        return usage(argv[0]);

    for (arg = 1; arg < argc; arg++)
        if (!strcmp(argv[arg], "-h") || !strcmp(argv[arg], "/?") || !strcmp(argv[arg], "--help"))
            return usage(argv[0]);
        else
        if (sscanf(argv[arg], "L=%d %c", &itemp, &dummy) == 1 ||
            sscanf(argv[arg], "l=%d %c", &itemp, &dummy) == 1 ||
            sscanf(argv[arg], "size=%d %c", &itemp, &dummy) == 1) {
            rows = itemp;
            cols = itemp;
        } else
        if (sscanf(argv[arg], "seed=%" SCNu64 " %c", &u64temp, &dummy) == 1 ||
            sscanf(argv[arg], "seed=%" SCNx64 " %c", &u64temp, &dummy) == 1 ||
            sscanf(argv[arg], "s=%" SCNu64 " %c", &u64temp, &dummy) == 1 ||
            sscanf(argv[arg], "s=%" SCNx64 " %c", &u64temp, &dummy) == 1) {
            seed = u64temp;
        } else
        if (sscanf(argv[arg], "N=%ld %c", &ltemp, &dummy) == 1 ||
            sscanf(argv[arg], "n=%ld %c", &ltemp, &dummy) == 1 ||
            sscanf(argv[arg], "count=%ld %c", &ltemp, &dummy) == 1) {
            iters = ltemp;
        } else
        if (sscanf(argv[arg], "rows=%d %c", &itemp, &dummy) == 1 ||
            sscanf(argv[arg], "r=%d %c", &itemp, &dummy) == 1 ||
            sscanf(argv[arg], "height=%d %c", &itemp, &dummy) == 1 ||
            sscanf(argv[arg], "h=%d %c", &itemp, &dummy) == 1) {
            rows = itemp;
        } else
        if (sscanf(argv[arg], "columns=%d %c", &itemp, &dummy) == 1 ||
            sscanf(argv[arg], "cols=%d %c", &itemp, &dummy) == 1 ||
            sscanf(argv[arg], "c=%d %c", &itemp, &dummy) == 1 ||
            sscanf(argv[arg], "width=%d %c", &itemp, &dummy) == 1 ||
            sscanf(argv[arg], "w=%d %c", &itemp, &dummy) == 1) {
            cols = itemp;
        } else
        if (sscanf(argv[arg], "black=%lf %c", &dtemp, &dummy) == 1 ||
            sscanf(argv[arg], "p0=%lf %c", &dtemp, &dummy) == 1 ||
            sscanf(argv[arg], "b=%lf %c", &dtemp, &dummy) == 1 ||
            sscanf(argv[arg], "P=%lf %c", &dtemp, &dummy) == 1 ||
            sscanf(argv[arg], "p0=%lf %c", &dtemp, &dummy) == 1 ||
            sscanf(argv[arg], "p=%lf %c", &dtemp, &dummy) == 1) {
            p_black = dtemp;
        } else
        if (sscanf(argv[arg], "white=%lf %c", &dtemp, &dummy) == 1 ||
            sscanf(argv[arg], "p1=%lf %c", &dtemp, &dummy) == 1) {
            p_black = 1.0 - dtemp;
        } else
        if (sscanf(argv[arg], "dwhite=%lf %c", &dtemp, &dummy) == 1 ||
            sscanf(argv[arg], "dw=%lf %c", &dtemp, &dummy) == 1 ||
            sscanf(argv[arg], "d0=%lf %c", &dtemp, &dummy) == 1) {
            d_white = dtemp;
        } else
        if (sscanf(argv[arg], "dblack=%lf %c", &dtemp, &dummy) == 1 ||
            sscanf(argv[arg], "db=%lf %c", &dtemp, &dummy) == 1 ||
            sscanf(argv[arg], "d1=%lf %c", &dtemp, &dummy) == 1) {
            d_black = dtemp;
        } else {
            fprintf(stderr, "%s: Unknown option.\n", argv[arg]);
            return EXIT_FAILURE;
        }

    switch (init_cluster(&c, rows, cols, p_black, d_white, d_black)) {
    case 0: break; /* OK */
    case ERR_INVALID:
        fprintf(stderr, "Invalid size.\n");
        return EXIT_FAILURE;
    case ERR_TOOLARGE:
        fprintf(stderr, "Size is too large.\n");
        return EXIT_FAILURE;
    case ERR_NOMEM:
        fprintf(stderr, "Not enough memory.\n");
        return EXIT_FAILURE;
    }

    if (!seed)
        seed = randomize(NULL);

    c.rng.state = seed;

    /* The largest possible cluster has n cells. */
    n = (size_t)rows * (size_t)cols;

    /* Print the comments describing the initial parameters. */
    printf("# seed: %" PRIu64 " (Xorshift 64*)\n", seed);
    printf("# size: %d rows, %d columns\n", rows, cols);
    printf("# P(black): %.6f (%" PRIu64 "/18446744073709551615)\n", p_black, c.p_black);
    printf("# P(black connected diagonally): %.6f (%" PRIu64 "/18446744073709551615)\n", d_black, c.d_black);
    printf("# P(white connected diagonally): %.6f (%" PRIu64 "/18446744073709551615)\n", d_white, c.d_white);
    fflush(stdout);

    while (iters-->0)
        iterate(&c);

    printf("# Iterations: %" PRIu64 "\n", c.iterations);
    printf("#\n");
    printf("# size  white_clusters(size) black_clusters(size) clusters(size)\n");

    /* Note: c._histogram[0] == c._histogram[n] == 0, for ease of scanning. */
    for (i = 1; i <= n; i++)
        if (c.white_histogram[i-1] || c.white_histogram[i] || c.white_histogram[i+1] ||
            c.black_histogram[i-1] || c.black_histogram[i] || c.black_histogram[i+1])
            printf("%lu %" FMT_COUNT " %" FMT_COUNT " %" FMT_COUNT "\n",
                   (unsigned long)i,
                   c.white_histogram[i],
                   c.black_histogram[i],
                   c.white_histogram[i]+c.black_histogram[i]);

    /* Since we are exiting anyway, this is not really necessary. */
    free_cluster(&c);

    /* All done. */
    return EXIT_SUCCESS;
}