C 不必声明一个巨大的2D数组,找到平均大小分布的有效方法是什么?
考虑一个lxlsize矩阵M,其条目可以是0或1。每个元素都是概率为p的1和概率为1-p的0。我将标记为1的元素称为黑色元素,标记为0的元素称为白色元素。我正在尝试编写一个代码:C 不必声明一个巨大的2D数组,找到平均大小分布的有效方法是什么?,c,arrays,multidimensional-array,average,C,Arrays,Multidimensional Array,Average,考虑一个lxlsize矩阵M,其条目可以是0或1。每个元素都是概率为p的1和概率为1-p的0。我将标记为1的元素称为黑色元素,标记为0的元素称为白色元素。我正在尝试编写一个代码: 生成包含条目0和1的随机矩阵。我需要输入矩阵L的大小和概率p 用相同的编号标记属于同一簇的所有黑色元素。(将黑色元素簇定义为单元格图中的最大连接组件,值为1,其中边连接行和列最多相差1的单元格(因此每个单元格最多有八个相邻单元格)换言之,如果矩阵中的两个黑色元素共享一个边或顶点,则认为它们属于同一个黑色簇。也就是说,
- 生成包含条目0和1的随机矩阵。我需要输入矩阵L的大小和概率p
- 用相同的编号标记属于同一簇的所有黑色元素。(将黑色元素簇定义为单元格图中的最大连接组件,值为1,其中边连接行和列最多相差1的单元格(因此每个单元格最多有八个相邻单元格)换言之,如果矩阵中的两个黑色元素共享一个边或顶点,则认为它们属于同一个黑色簇。也就是说,把矩阵看成是一个大的方块,而元素则是大方格中的小方格。)
- 在一个从p=0到p=100的循环中(我指的是概率p的百分比):每个大小的黑色簇的数量被写入一个与p值对应的文件中
#include "stdafx.h"
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <time.h>
#include <string.h>
int *labels;
int n_labels = 0;
int uf_find(int x) {
int y = x;
while (labels[y] != y)
y = labels[y];
while (labels[x] != x) {
int z = labels[x];
labels[x] = y;
x = z;
}
return y;
}
int uf_union(int x, int y) {
return labels[uf_find(x)] = uf_find(y);
}
int uf_make_set(void) {
labels[0] ++;
assert(labels[0] < n_labels);
labels[labels[0]] = labels[0];
return labels[0];
}
void uf_initialize(int max_labels) {
n_labels = max_labels;
labels = (int*)calloc(sizeof(int), n_labels);
labels[0] = 0;
}
void uf_done(void) {
free(labels);
labels = 0;
}
#define max(a,b) (a>b?a:b)
#define min(a,b) (a>b?b:a)
int hoshen_kopelman(int **matrix, int m, int n) {
uf_initialize(m * n / 2);
for (int i = 0; i<m; i++)
for (int j = 0; j<n; j++)
if (matrix[i][j]) {
int up = (i == 0 ? 0 : matrix[i - 1][j]);
int left = (j == 0 ? 0 : matrix[i][j - 1]);
switch (!!up + !!left) {
case 0:
matrix[i][j] = uf_make_set();
break;
case 1:
matrix[i][j] = max(up, left);
break;
case 2:
matrix[i][j] = uf_union(up, left);
break;
}
int north_west;
if (i == 0 || j == 0)
north_west = 0;
else
north_west = matrix[i - 1][j - 1];
int north_east;
if (i == 0 || j == (n - 1))
north_east = 0;
else
north_east = matrix[i - 1][j + 1];
if (!!north_west == 1)
{
if (i != 0 && j != 0)
{
//if (rand() % 2 == 1)
//{
if (matrix[i][j - 1] == 0 && matrix[i - 1][j] == 0)
{
if (!!matrix[i][j] == 0)
matrix[i][j] = north_west;
else
uf_union(north_west, matrix[i][j]);
}
//}
}
else if (i == 0 || j == 0)
{
}
}
if (!!north_east == 1)
{
if (i != 0 && j != n - 1)
{
//if (rand() % 2 == 1)
//{
if (matrix[i - 1][j] == 0 && matrix[i][j + 1] == 0)
{
if (!!matrix[i][j] == 0)
matrix[i][j] = north_east;
else
uf_union(north_east, matrix[i][j]);
}
//}
}
else if (i == 0 || j == n - 1)
{
}
}
}
int *new_labels = (int*)calloc(sizeof(int), n_labels);
for (int i = 0; i<m; i++)
for (int j = 0; j<n; j++)
if (matrix[i][j]) {
int x = uf_find(matrix[i][j]);
if (new_labels[x] == 0) {
new_labels[0]++;
new_labels[x] = new_labels[0];
}
matrix[i][j] = new_labels[x];
}
int total_clusters = new_labels[0];
free(new_labels);
uf_done();
return total_clusters;
}
void check_labelling(int **matrix, int m, int n) {
int N, S, E, W;
for (int i = 0; i<m; i++)
for (int j = 0; j<n; j++)
if (matrix[i][j]) {
N = (i == 0 ? 0 : matrix[i - 1][j]);
S = (i == m - 1 ? 0 : matrix[i + 1][j]);
E = (j == n - 1 ? 0 : matrix[i][j + 1]);
W = (j == 0 ? 0 : matrix[i][j - 1]);
assert(N == 0 || matrix[i][j] == N);
assert(S == 0 || matrix[i][j] == S);
assert(E == 0 || matrix[i][j] == E);
assert(W == 0 || matrix[i][j] == W);
}
}
int cluster_count(int **matrix, int size, int N)
{
int i;
int j;
int count = 0;
for (i = 0; i < size; i++)
{
for (j = 0; j < size; j++)
{
if (matrix[i][j] == N)
count++;
}
}
return count;
}
int main(int argc, char **argv)
{
srand((unsigned int)time(0));
int p = 0;
printf("Enter number of rows/columns: ");
int size = 0;
scanf("%d", &size);
printf("\n");
FILE *fp;
printf("Enter number of averaging iterations: ");
int iterations = 0;
scanf("%d", &iterations);
for (int p = 0; p <= 100; p++)
{
char str[100];
sprintf(str, "BlackSizeDistribution%03i.txt", p);
fp = fopen(str, "w");
int **matrix;
matrix = (int**)calloc(10, sizeof(int*));
int** matrix_new = (int**)calloc(10, sizeof(int*));
matrix_new = (int **)realloc(matrix, sizeof(int*) * size);
matrix = matrix_new;
for (int i = 0; i < size; i++)
{
matrix[i] = (int *)calloc(size, sizeof(int));
for (int j = 0; j < size; j++)
{
int z = rand() % 100;
z = z + 1;
if (p == 0)
matrix[i][j] = 0;
if (z <= p)
matrix[i][j] = 1;
else
matrix[i][j] = 0;
}
}
hoshen_kopelman(matrix, size, size);
int highest = matrix[0][0];
for (int i = 0; i < size; i++)
for (int j = 0; j < size; j++)
if (highest < matrix[i][j])
highest = matrix[i][j];
int* counter = (int*)calloc(sizeof(int*), highest + 1);
int high = 0;
for (int k = 1; k <= highest; k++)
{
counter[k] = cluster_count(matrix, size, k);
if (counter[k] > high)
high = counter[k];
}
int* size_distribution = (int*)calloc(sizeof(int*), high + 1);
for (int y = 1; y <= high; y++)
{
int count = 0;
for (int z = 1; z <= highest; z++)
if (counter[z] == y)
count++;
size_distribution[y] = count;
}
for (int k = 1; k <= high; k++)
{
fprintf(fp, "\n%d\t%d", k, size_distribution[k]);
printf("\n%d\t%d", k, size_distribution[k]);
}
printf("\n");
check_labelling(matrix, size, size);
for (int i = 0; i < size; i++)
free(matrix[i]);
free(matrix);
}
}
我想做一些类似的事情,只是我需要在50或100次迭代中进行平均,以获得我的数学模型的准确值。顺便说一下,我并不需要为每个迭代输出值,即迭代1、迭代2、迭代3。。。在数据文件中。我只需要“打印”上表中的第一列和最后一列(具有平均值)
现在,我可能需要修改主函数来进行平均
以下是主要功能:
int main(int argc, char **argv)
{
srand((unsigned int)time(0));
int p = 0;
printf("Enter number of rows/columns: ");
int size = 0;
scanf("%d", &size);
printf("\n");
FILE *fp;
printf("Enter number of averaging iterations: ");
int iterations = 0;
scanf("%d", &iterations);
for (int p = 0; p <= 100; p++)
{
char str[100];
sprintf(str, "BlackSizeDistribution%03i.txt", p);
fp = fopen(str, "w");
int **matrix;
matrix = (int**)calloc(10, sizeof(int*));
int** matrix_new = (int**)calloc(10, sizeof(int*));
matrix_new = (int **)realloc(matrix, sizeof(int*) * size);
matrix = matrix_new;
for (int i = 0; i < size; i++)
{
matrix[i] = (int *)calloc(size, sizeof(int));
for (int j = 0; j < size; j++)
{
int z = rand() % 100;
z = z + 1;
if (p == 0)
matrix[i][j] = 0;
if (z <= p)
matrix[i][j] = 1;
else
matrix[i][j] = 0;
}
}
hoshen_kopelman(matrix, size, size);
int highest = matrix[0][0];
for (int i = 0; i < size; i++)
for (int j = 0; j < size; j++)
if (highest < matrix[i][j])
highest = matrix[i][j];
int* counter = (int*)calloc(sizeof(int*), highest + 1);
int high = 0;
for (int k = 1; k <= highest; k++)
{
counter[k] = cluster_count(matrix, size, k);
if (counter[k] > high)
high = counter[k];
}
int* size_distribution = (int*)calloc(sizeof(int*), high + 1);
for (int y = 1; y <= high; y++)
{
int count = 0;
for (int z = 1; z <= highest; z++)
if (counter[z] == y)
count++;
size_distribution[y] = count;
}
for (int k = 1; k <= high; k++)
{
fprintf(fp, "\n%d\t%d", k, size_distribution[k]);
printf("\n%d\t%d", k, size_distribution[k]);
}
printf("\n");
check_labelling(matrix, size, size);
for (int i = 0; i < size; i++)
free(matrix[i]);
free(matrix);
}
}
int main(int argc,char**argv)
{
srand((无符号整数)时间(0));
int p=0;
printf(“输入行/列数:”);
int size=0;
scanf(“%d”,大小(&S);
printf(“\n”);
文件*fp;
printf(“输入平均迭代次数:”);
int迭代次数=0;
scanf(“%d”,迭代次数(&d));
因为(intp=0;p啊,一个非常贴近我内心的话题!)
因为您正在收集统计信息,并且确切的配置仅在收集所述统计信息的持续时间内才有意义,所以您只需要两个L×L矩阵:一个用于保存颜色信息(一个白色,0到L×L黑色,因此一个类型可以保存0到L2+1之间的整数,包括0和L2+1);另一个用于保存该类型元素的数量(0和L2之间的整数,包括0和L2)
要在多次迭代中为每个L和p值保存每个大小的黑色簇的数量,您需要第三个L×L+1元素数组;但请注意,如果使用相同的L和p进行N次迭代,其值可以达到N×L×L
标准C伪随机数生成器非常糟糕;不要使用它。请使用或我最喜欢的(或相关的Xorshift1024*)。虽然Xorshift64*非常快,但它的分布对于像您这样的模拟来说足够“随机”
与其只存储“黑色”或“白色”,不如存储“无趣”或群集ID;最初,所有(未连接的)黑色都有一个唯一的群集ID。这样,您就可以实现一个群集,并将连接的单元格高效地连接到群集
下面是我将如何用伪代码实现这一点:
Let id[L*L] be the color information; index is L*row+col
Let idcount[L*L+1] be the id count over one iteration
Let count[L*L+1] be the number of clusters across N iterations
Let limit = (p / 100.0) * PRNG_MAX
Let white = L*L (and blacks are 0 to L*L-1, inclusive)
Clear count[] to all zeros
For iter = 1 to N, inclusive:
For row = 0 to L-1, inclusive:
If PRNG() <= limit:
Let id[L*row] = L*row
Else:
Let id[L*row] = white
End If
For col = 1 to L-1, inclusive:
If PRNG() <= limit:
If id[L*row+col-1] == white:
Let id[L*row+col] = L*row + col
Else:
Let id[L*row+col] = id[L*row+col-1]
End If
Else:
Let id[L*row+col] = white
End If
End For
End For
当然,您应该组合循环,以保持尽可能好的访问位置。(分别在一个单独的内部循环中执行col==0
,并将row==0
,最小化if
子句,因为它们往往比较慢。)您甚至可以创建id
数组(L+1)尺寸为2,用白色填充单元格的外边缘,以简化上述连接,使其成为一个单双环,成本为4L+4个额外使用的单元格
此时,您需要展平每个不相交的集合。每个非白色的ID值都是L*row+col
(意思是“this”),或者是对另一个单元格的引用。展平意味着对于每个ID,我们在链中找到最终ID,并使用它:
For i = 0 to L*L-1, inclusive:
If (id[i] != white):
Let k = i
While (id[k] != k):
k = id[k]
End While
id[i] = k
End If
End For
接下来,我们需要计算具有特定ID的单元数:
Clear idn[]
For i = 0 to L*L-1, inclusive:
Increment idn[id[i]]
End For
因为我们对N次迭代中特定大小的集群数量感兴趣,所以我们可以简单地通过在该迭代中找到的每个特定大小的集群更新计数
数组:
For i = 1 to L*L, inclusive:
Increment count[idn[i]]
End For
Let count[0] = 0
End For
此时,count[i]
包含在N
迭代中发现的i
单元格的黑色簇数;换句话说,它是在N次迭代中看到的簇大小的直方图(离散分布)
上述措施的一种实施方式如下
(第一个实现有数组大小和簇标签的问题;这个版本将这个部分拆分成一个单独的文件,并且更容易验证正确性。但是,这只是第二个版本,所以我不认为它是生产质量)。
首先,操作矩阵的函数,cluster.h:
#ifndef CLUSTER_H
#define CLUSTER_H
#include <stdlib.h>
#include <inttypes.h>
#include <string.h>
#include <stdio.h>
#include <time.h>
/*
This is in the public domain.
Written by Nominal Animal <question@nominal-animal.net>
*/
typedef uint32_t cluster_label;
typedef uint32_t cluster_size;
static size_t rows = 0; /* Number of mutable rows */
static size_t cols = 0; /* Number of mutable columns */
static size_t nrows = 0; /* Number of accessible rows == rows + 1 */
static size_t ncols = 0; /* Number of accessible columns == cols + 1 */
static size_t cells = 0; /* Total number of cells == nrows*ncols */
static size_t empty = 0; /* Highest-numbered label == nrows*ncols - 1 */
static size_t sizes = 0; /* Number of possible cluster sizes == rows*cols + 1 */
#define INDEX(row, col) (ncols*(row) + (col))
cluster_label *label = NULL; /* 2D contents: label[ncols*(row) + (col)] */
cluster_size *occurs = NULL; /* Number of occurrences of each label value */
/* Xorshift64* PRNG used for generating the matrix.
*/
static uint64_t prng_state = 1;
static inline uint64_t prng_u64(void)
{
uint64_t state = prng_state;
state ^= state >> 12;
state ^= state << 25;
state ^= state >> 27;
prng_state = state;
return state * UINT64_C(2685821657736338717);
}
static inline uint64_t prng_u64_less(void)
{
uint64_t result;
do {
result = prng_u64();
} while (result == UINT64_C(18446744073709551615));
return result;
}
static inline uint64_t prng_seed(const uint64_t seed)
{
if (seed)
prng_state = seed;
else
prng_state = 1;
return prng_state;
}
static inline uint64_t prng_randomize(void)
{
int discard = 1024;
uint64_t seed;
FILE *in;
/* By default, use time. */
prng_seed( ((uint64_t)time(NULL) * 2832631) ^
((uint64_t)clock() * 1120939) );
#ifdef __linux__
/* On Linux, read from /dev/urandom. */
in = fopen("/dev/urandom", "r");
if (in) {
if (fread(&seed, sizeof seed, 1, in) == 1)
prng_seed(seed);
fclose(in);
}
#endif
/* Churn the state a bit. */
seed = prng_state;
while (discard-->0) {
seed ^= seed >> 12;
seed ^= seed << 25;
seed ^= seed >> 27;
}
return prng_state = seed;
}
static inline void cluster_free(void)
{
free(occurs); occurs = NULL;
free(label); label = NULL;
rows = 0;
cols = 0;
nrows = 0;
ncols = 0;
cells = 0;
empty = 0;
sizes = 0;
}
static int cluster_init(const size_t want_cols, const size_t want_rows)
{
cluster_free();
if (want_cols < 1 || want_rows < 1)
return -1; /* Invalid number of rows or columns */
rows = want_rows;
cols = want_cols;
nrows = rows + 1;
ncols = cols + 1;
cells = nrows * ncols;
if ((size_t)(cells / nrows) != ncols ||
nrows <= rows || ncols <= cols)
return -1; /* Size overflows */
empty = nrows * ncols - 1;
sizes = rows * cols + 1;
label = calloc(cells, sizeof label[0]);
occurs = calloc(cells, sizeof occurs[0]);
if (!label || !occurs) {
cluster_free();
return -1; /* Not enough memory. */
}
return 0;
}
static void join2(size_t i1, size_t i2)
{
size_t root1 = i1, root2 = i2, root;
while (root1 != label[root1])
root1 = label[root1];
while (root2 != label[root2])
root2 = label[root2];
root = root1;
if (root > root2)
root = root2;
while (label[i1] != root) {
const size_t temp = label[i1];
label[i1] = root;
i1 = temp;
}
while (label[i2] != root) {
const size_t temp = label[i2];
label[i2] = root;
i2 = temp;
}
}
static void join3(size_t i1, size_t i2, size_t i3)
{
size_t root1 = i1, root2 = i2, root3 = i3, root;
while (root1 != label[root1])
root1 = label[root1];
while (root2 != label[root2])
root2 = label[root2];
while (root3 != label[root3])
root3 = label[root3];
root = root1;
if (root > root2)
root = root2;
if (root > root3)
root = root3;
while (label[i1] != root) {
const size_t temp = label[i1];
label[i1] = root;
i1 = temp;
}
while (label[i2] != root) {
const size_t temp = label[i2];
label[i2] = root;
i2 = temp;
}
while (label[i3] != root) {
const size_t temp = label[i3];
label[i3] = root;
i3 = temp;
}
}
static void join4(size_t i1, size_t i2, size_t i3, size_t i4)
{
size_t root1 = i1, root2 = i2, root3 = i3, root4 = i4, root;
while (root1 != label[root1])
root1 = label[root1];
while (root2 != label[root2])
root2 = label[root2];
while (root3 != label[root3])
root3 = label[root3];
while (root4 != label[root4])
root4 = label[root4];
root = root1;
if (root > root2)
root = root2;
if (root > root3)
root = root3;
if (root > root4)
root = root4;
while (label[i1] != root) {
const size_t temp = label[i1];
label[i1] = root;
i1 = temp;
}
while (label[i2] != root) {
const size_t temp = label[i2];
label[i2] = root;
i2 = temp;
}
while (label[i3] != root) {
const size_t temp = label[i3];
label[i3] = root;
i3 = temp;
}
while (label[i4] != root) {
const size_t temp = label[i4];
label[i4] = root;
i4 = temp;
}
}
static void cluster_fill(const uint64_t plimit)
{
size_t r, c;
/* Fill the matrix with the specified probability.
For efficiency, we use the same label for all
horizontally consecutive cells.
*/
for (r = 0; r < rows; r++) {
const size_t imax = ncols*r + cols;
cluster_label last;
size_t i = ncols*r;
last = (prng_u64_less() < plimit) ? i : empty;
label[i] = last;
while (++i < imax) {
if (prng_u64_less() < plimit) {
if (last == empty)
last = i;
} else
last = empty;
label[i] = last;
}
label[imax] = empty;
}
/* Fill the extra row with empty, too. */
{
cluster_label *ptr = label + ncols*rows;
cluster_label *const end = label + ncols*nrows;
while (ptr < end)
*(ptr++) = empty;
}
/* On the very first row, we need to join non-empty cells
vertically and diagonally down. */
for (c = 0; c < cols; c++)
switch ( ((label[c] < empty) << 0) |
((label[c+ncols] < empty) << 1) |
((label[c+ncols+1] < empty) << 2) ) {
/* <1> *
* 2 4 */
case 3: /* Join down */
join2(c, c+ncols); break;
case 5: /* Join down right */
join2(c, c+ncols+1); break;
case 7: /* Join down and down right */
join3(c, c+ncols, c+ncols+1); break;
}
/* On the other rows, we need to join non-empty cells
vertically, diagonally down, and diagonally up. */
for (r = 1; r < rows; r++) {
const size_t imax = ncols*r + cols;
size_t i;
for (i = ncols*r; i < imax; i++)
switch ( ((label[i] < empty) << 0) |
((label[i-ncols+1] < empty) << 1) |
((label[i+ncols] < empty) << 2) |
((label[i+ncols+1] < empty) << 3) ) {
/* 2 *
* <1> *
* 4 8 */
case 3: /* Diagonally up */
join2(i, i-ncols+1); break;
case 5: /* Down */
join2(i, i+ncols); break;
case 7: /* Down and diagonally up */
join3(i, i-ncols+1, i+ncols); break;
case 9: /* Diagonally down */
join2(i, i+ncols+1); break;
case 11: /* Diagonally up and diagonally down */
join3(i, i-ncols+1, i+ncols+1); break;
case 13: /* Down and diagonally down */
join3(i, i+ncols, i+ncols+1); break;
case 15: /* Down, diagonally down, and diagonally up */
join4(i, i-ncols+1, i+ncols, i+ncols+1); break;
}
}
/* Flatten the labels, so that all cells belonging to the
same cluster will have the same label. */
{
const size_t imax = ncols*rows;
size_t i;
for (i = 0; i < imax; i++)
if (label[i] < empty) {
size_t k = i, kroot = i;
while (kroot != label[kroot])
kroot = label[kroot];
while (label[k] != kroot) {
const size_t temp = label[k];
label[k] = kroot;
k = temp;
}
}
}
/* Count the number of occurrences of each label. */
memset(occurs, 0, (empty + 1) * sizeof occurs[0]);
{
cluster_label *const end = label + ncols*rows;
cluster_label *ptr = label;
while (ptr < end)
++occurs[*(ptr++)];
}
}
#endif /* CLUSTER_H */
#include <stdlib.h>
#include <inttypes.h>
#include <stdio.h>
#include "cluster.h"
/*
This program is in the public domain.
Written by Nominal Animal <question@nominal-animal.net>
*/
int main(int argc, char *argv[])
{
uint64_t plimit;
uint32_t *color;
unsigned long size, r, c;
double probability;
char dummy;
if (argc != 3) {
fprintf(stderr, "\n");
fprintf(stderr, "Usage: %s SIZE PROBABILITY > matrix.ppm\n", argv[0]);
fprintf(stderr, "\n");
fprintf(stderr, "Where SIZE sets the matrix size (SIZE x SIZE),\n");
fprintf(stderr, " PROBABILITY is the probability [0, 1] of a cell\n");
fprintf(stderr, " being non-empty.\n");
fprintf(stderr, "\n");
return EXIT_FAILURE;
}
if (sscanf(argv[1], " %lu %c", &size, &dummy) != 1 || size < 1u) {
fprintf(stderr, "%s: Invalid matrix size.\n", argv[1]);
return EXIT_FAILURE;
}
if (sscanf(argv[2], " %lf %c", &probability, &dummy) != 1 || probability < 0.0 || probability > 1.0) {
fprintf(stderr, "%s: Invalid probability.\n", argv[2]);
return EXIT_FAILURE;
}
/* Technically, we want plimit = (uint64_t)(0.5 + 18446744073709551615.0*p),
but doubles do not have the precision for that. */
if (probability > 0.9999999999999999)
plimit = UINT64_C(18446744073709551615);
else
if (probability <= 0)
plimit = UINT64_C(0);
else
plimit = (uint64_t)(18446744073709551616.0 * probability);
/* Try allocating memory for the matrix and the label count array. */
if (size > 2147483646u || cluster_init(size, size)) {
fprintf(stderr, "%s: Matrix size is too large.\n", argv[1]);
return EXIT_FAILURE;
}
/* Try allocating memory for the color lookup array. */
color = calloc(empty + 1, sizeof color[0]);
if (!color) {
fprintf(stderr, "%s: Matrix size is too large.\n", argv[1]);
return EXIT_FAILURE;
}
fprintf(stderr, "Using Xorshift64* seed %" PRIu64 "\n", prng_randomize());
fflush(stderr);
/* Construct the matrix. */
cluster_fill(plimit);
{
size_t i;
color[empty] = 0xFFFFFFu;
/* Assign random colors. */
for (i = 0; i < empty; i++)
color[i] = prng_u64() >> 40;
}
printf("P6\n%lu %lu 255\n", size, size);
for (r = 0; r < rows; r++)
for (c = 0; c < cols; c++) {
const uint32_t rgb = color[label[ncols*r + c]];
fputc((rgb >> 16) & 255, stdout);
fputc((rgb >> 8) & 255, stdout);
fputc( rgb & 255, stdout);
}
fflush(stdout);
fprintf(stderr, "Done.\n");
return EXIT_SUCCESS;
}
程序以Gnuplot兼容的格式输出集群大小分布(直方图),从几个注释行开始,以#
开头,以指示用于计算结果的确切参数
输出中的第一列是集群大小(集群中的单元数),第二列是迭代期间发现的此类集群的精确计数,第三列是observ
#ifndef CLUSTER_H
#define CLUSTER_H
#include <stdlib.h>
#include <inttypes.h>
#include <string.h>
#include <stdio.h>
#include <time.h>
/*
This is in the public domain.
Written by Nominal Animal <question@nominal-animal.net>
*/
typedef uint32_t cluster_label;
typedef uint32_t cluster_size;
static size_t rows = 0; /* Number of mutable rows */
static size_t cols = 0; /* Number of mutable columns */
static size_t nrows = 0; /* Number of accessible rows == rows + 1 */
static size_t ncols = 0; /* Number of accessible columns == cols + 1 */
static size_t cells = 0; /* Total number of cells == nrows*ncols */
static size_t empty = 0; /* Highest-numbered label == nrows*ncols - 1 */
static size_t sizes = 0; /* Number of possible cluster sizes == rows*cols + 1 */
#define INDEX(row, col) (ncols*(row) + (col))
cluster_label *label = NULL; /* 2D contents: label[ncols*(row) + (col)] */
cluster_size *occurs = NULL; /* Number of occurrences of each label value */
/* Xorshift64* PRNG used for generating the matrix.
*/
static uint64_t prng_state = 1;
static inline uint64_t prng_u64(void)
{
uint64_t state = prng_state;
state ^= state >> 12;
state ^= state << 25;
state ^= state >> 27;
prng_state = state;
return state * UINT64_C(2685821657736338717);
}
static inline uint64_t prng_u64_less(void)
{
uint64_t result;
do {
result = prng_u64();
} while (result == UINT64_C(18446744073709551615));
return result;
}
static inline uint64_t prng_seed(const uint64_t seed)
{
if (seed)
prng_state = seed;
else
prng_state = 1;
return prng_state;
}
static inline uint64_t prng_randomize(void)
{
int discard = 1024;
uint64_t seed;
FILE *in;
/* By default, use time. */
prng_seed( ((uint64_t)time(NULL) * 2832631) ^
((uint64_t)clock() * 1120939) );
#ifdef __linux__
/* On Linux, read from /dev/urandom. */
in = fopen("/dev/urandom", "r");
if (in) {
if (fread(&seed, sizeof seed, 1, in) == 1)
prng_seed(seed);
fclose(in);
}
#endif
/* Churn the state a bit. */
seed = prng_state;
while (discard-->0) {
seed ^= seed >> 12;
seed ^= seed << 25;
seed ^= seed >> 27;
}
return prng_state = seed;
}
static inline void cluster_free(void)
{
free(occurs); occurs = NULL;
free(label); label = NULL;
rows = 0;
cols = 0;
nrows = 0;
ncols = 0;
cells = 0;
empty = 0;
sizes = 0;
}
static int cluster_init(const size_t want_cols, const size_t want_rows)
{
cluster_free();
if (want_cols < 1 || want_rows < 1)
return -1; /* Invalid number of rows or columns */
rows = want_rows;
cols = want_cols;
nrows = rows + 1;
ncols = cols + 1;
cells = nrows * ncols;
if ((size_t)(cells / nrows) != ncols ||
nrows <= rows || ncols <= cols)
return -1; /* Size overflows */
empty = nrows * ncols - 1;
sizes = rows * cols + 1;
label = calloc(cells, sizeof label[0]);
occurs = calloc(cells, sizeof occurs[0]);
if (!label || !occurs) {
cluster_free();
return -1; /* Not enough memory. */
}
return 0;
}
static void join2(size_t i1, size_t i2)
{
size_t root1 = i1, root2 = i2, root;
while (root1 != label[root1])
root1 = label[root1];
while (root2 != label[root2])
root2 = label[root2];
root = root1;
if (root > root2)
root = root2;
while (label[i1] != root) {
const size_t temp = label[i1];
label[i1] = root;
i1 = temp;
}
while (label[i2] != root) {
const size_t temp = label[i2];
label[i2] = root;
i2 = temp;
}
}
static void join3(size_t i1, size_t i2, size_t i3)
{
size_t root1 = i1, root2 = i2, root3 = i3, root;
while (root1 != label[root1])
root1 = label[root1];
while (root2 != label[root2])
root2 = label[root2];
while (root3 != label[root3])
root3 = label[root3];
root = root1;
if (root > root2)
root = root2;
if (root > root3)
root = root3;
while (label[i1] != root) {
const size_t temp = label[i1];
label[i1] = root;
i1 = temp;
}
while (label[i2] != root) {
const size_t temp = label[i2];
label[i2] = root;
i2 = temp;
}
while (label[i3] != root) {
const size_t temp = label[i3];
label[i3] = root;
i3 = temp;
}
}
static void join4(size_t i1, size_t i2, size_t i3, size_t i4)
{
size_t root1 = i1, root2 = i2, root3 = i3, root4 = i4, root;
while (root1 != label[root1])
root1 = label[root1];
while (root2 != label[root2])
root2 = label[root2];
while (root3 != label[root3])
root3 = label[root3];
while (root4 != label[root4])
root4 = label[root4];
root = root1;
if (root > root2)
root = root2;
if (root > root3)
root = root3;
if (root > root4)
root = root4;
while (label[i1] != root) {
const size_t temp = label[i1];
label[i1] = root;
i1 = temp;
}
while (label[i2] != root) {
const size_t temp = label[i2];
label[i2] = root;
i2 = temp;
}
while (label[i3] != root) {
const size_t temp = label[i3];
label[i3] = root;
i3 = temp;
}
while (label[i4] != root) {
const size_t temp = label[i4];
label[i4] = root;
i4 = temp;
}
}
static void cluster_fill(const uint64_t plimit)
{
size_t r, c;
/* Fill the matrix with the specified probability.
For efficiency, we use the same label for all
horizontally consecutive cells.
*/
for (r = 0; r < rows; r++) {
const size_t imax = ncols*r + cols;
cluster_label last;
size_t i = ncols*r;
last = (prng_u64_less() < plimit) ? i : empty;
label[i] = last;
while (++i < imax) {
if (prng_u64_less() < plimit) {
if (last == empty)
last = i;
} else
last = empty;
label[i] = last;
}
label[imax] = empty;
}
/* Fill the extra row with empty, too. */
{
cluster_label *ptr = label + ncols*rows;
cluster_label *const end = label + ncols*nrows;
while (ptr < end)
*(ptr++) = empty;
}
/* On the very first row, we need to join non-empty cells
vertically and diagonally down. */
for (c = 0; c < cols; c++)
switch ( ((label[c] < empty) << 0) |
((label[c+ncols] < empty) << 1) |
((label[c+ncols+1] < empty) << 2) ) {
/* <1> *
* 2 4 */
case 3: /* Join down */
join2(c, c+ncols); break;
case 5: /* Join down right */
join2(c, c+ncols+1); break;
case 7: /* Join down and down right */
join3(c, c+ncols, c+ncols+1); break;
}
/* On the other rows, we need to join non-empty cells
vertically, diagonally down, and diagonally up. */
for (r = 1; r < rows; r++) {
const size_t imax = ncols*r + cols;
size_t i;
for (i = ncols*r; i < imax; i++)
switch ( ((label[i] < empty) << 0) |
((label[i-ncols+1] < empty) << 1) |
((label[i+ncols] < empty) << 2) |
((label[i+ncols+1] < empty) << 3) ) {
/* 2 *
* <1> *
* 4 8 */
case 3: /* Diagonally up */
join2(i, i-ncols+1); break;
case 5: /* Down */
join2(i, i+ncols); break;
case 7: /* Down and diagonally up */
join3(i, i-ncols+1, i+ncols); break;
case 9: /* Diagonally down */
join2(i, i+ncols+1); break;
case 11: /* Diagonally up and diagonally down */
join3(i, i-ncols+1, i+ncols+1); break;
case 13: /* Down and diagonally down */
join3(i, i+ncols, i+ncols+1); break;
case 15: /* Down, diagonally down, and diagonally up */
join4(i, i-ncols+1, i+ncols, i+ncols+1); break;
}
}
/* Flatten the labels, so that all cells belonging to the
same cluster will have the same label. */
{
const size_t imax = ncols*rows;
size_t i;
for (i = 0; i < imax; i++)
if (label[i] < empty) {
size_t k = i, kroot = i;
while (kroot != label[kroot])
kroot = label[kroot];
while (label[k] != kroot) {
const size_t temp = label[k];
label[k] = kroot;
k = temp;
}
}
}
/* Count the number of occurrences of each label. */
memset(occurs, 0, (empty + 1) * sizeof occurs[0]);
{
cluster_label *const end = label + ncols*rows;
cluster_label *ptr = label;
while (ptr < end)
++occurs[*(ptr++)];
}
}
#endif /* CLUSTER_H */
#include <stdlib.h>
#include <inttypes.h>
#include <stdio.h>
#include "cluster.h"
/*
This program is in the public domain.
Written by Nominal Animal <question@nominal-animal.net>
*/
int main(int argc, char *argv[])
{
uint64_t *count = NULL;
uint64_t plimit;
unsigned long size, iterations, i;
double probability;
char dummy;
if (argc != 4) {
fprintf(stderr, "\n");
fprintf(stderr, "Usage: %s SIZE ITERATIONS PROBABILITY\n", argv[0]);
fprintf(stderr, "\n");
fprintf(stderr, "Where SIZE sets the matrix size (SIZE x SIZE),\n");
fprintf(stderr, " ITERATIONS is the number of iterations, and\n");
fprintf(stderr, " PROBABILITY is the probability [0, 1] of a cell\n");
fprintf(stderr, " being non-empty.\n");
fprintf(stderr, "\n");
return EXIT_FAILURE;
}
if (sscanf(argv[1], " %lu %c", &size, &dummy) != 1 || size < 1u) {
fprintf(stderr, "%s: Invalid matrix size.\n", argv[1]);
return EXIT_FAILURE;
}
if (sscanf(argv[2], " %lu %c", &iterations, &dummy) != 1 || iterations < 1u) {
fprintf(stderr, "%s: Invalid number of iterations.\n", argv[2]);
return EXIT_FAILURE;
}
if (sscanf(argv[3], " %lf %c", &probability, &dummy) != 1 || probability < 0.0 || probability > 1.0) {
fprintf(stderr, "%s: Invalid probability.\n", argv[3]);
return EXIT_FAILURE;
}
/* Technically, we want plimit = (uint64_t)(0.5 + 18446744073709551615.0*p),
but doubles do not have the precision for that. */
if (probability > 0.9999999999999999)
plimit = UINT64_C(18446744073709551615);
else
if (probability <= 0)
plimit = UINT64_C(0);
else
plimit = (uint64_t)(18446744073709551616.0 * probability);
/* Try allocating memory for the matrix and the label count array. */
if (size > 2147483646u || cluster_init(size, size)) {
fprintf(stderr, "%s: Matrix size is too large.\n", argv[1]);
return EXIT_FAILURE;
}
/* Try allocating memory for the cluster size histogram. */
count = calloc(sizes + 1, sizeof count[0]);
if (!count) {
fprintf(stderr, "%s: Matrix size is too large.\n", argv[1]);
return EXIT_FAILURE;
}
printf("# Xorshift64* seed is %" PRIu64 "\n", prng_randomize());
printf("# Matrix size is %lu x %lu\n", size, size);
printf("# Probability of a cell to be non-empty is %.6f%%\n",
100.0 * ((double)plimit / 18446744073709551615.0));
printf("# Collecting statistics over %lu iterations\n", iterations);
printf("# Size Count CountPerIteration\n");
fflush(stdout);
for (i = 0u; i < iterations; i++) {
size_t k = empty;
cluster_fill(plimit);
/* Add cluster sizes to the histogram. */
while (k-->0)
++count[occurs[k]];
}
/* Print the histogram of cluster sizes. */
{
size_t k = 1;
printf("%zu %" PRIu64 " %.3f\n", k, count[k], (double)count[k] / (double)iterations);
for (k = 2; k < sizes; k++)
if (count[k-1] != 0 || count[k] != 0 || count[k+1] != 0)
printf("%zu %" PRIu64 " %.3f\n", k, count[k], (double)count[k] / (double)iterations);
}
return EXIT_SUCCESS;
}
#include <stdlib.h>
#include <inttypes.h>
#include <stdio.h>
#include "cluster.h"
/*
This program is in the public domain.
Written by Nominal Animal <question@nominal-animal.net>
*/
int main(int argc, char *argv[])
{
uint64_t plimit;
uint32_t *color;
unsigned long size, r, c;
double probability;
char dummy;
if (argc != 3) {
fprintf(stderr, "\n");
fprintf(stderr, "Usage: %s SIZE PROBABILITY > matrix.ppm\n", argv[0]);
fprintf(stderr, "\n");
fprintf(stderr, "Where SIZE sets the matrix size (SIZE x SIZE),\n");
fprintf(stderr, " PROBABILITY is the probability [0, 1] of a cell\n");
fprintf(stderr, " being non-empty.\n");
fprintf(stderr, "\n");
return EXIT_FAILURE;
}
if (sscanf(argv[1], " %lu %c", &size, &dummy) != 1 || size < 1u) {
fprintf(stderr, "%s: Invalid matrix size.\n", argv[1]);
return EXIT_FAILURE;
}
if (sscanf(argv[2], " %lf %c", &probability, &dummy) != 1 || probability < 0.0 || probability > 1.0) {
fprintf(stderr, "%s: Invalid probability.\n", argv[2]);
return EXIT_FAILURE;
}
/* Technically, we want plimit = (uint64_t)(0.5 + 18446744073709551615.0*p),
but doubles do not have the precision for that. */
if (probability > 0.9999999999999999)
plimit = UINT64_C(18446744073709551615);
else
if (probability <= 0)
plimit = UINT64_C(0);
else
plimit = (uint64_t)(18446744073709551616.0 * probability);
/* Try allocating memory for the matrix and the label count array. */
if (size > 2147483646u || cluster_init(size, size)) {
fprintf(stderr, "%s: Matrix size is too large.\n", argv[1]);
return EXIT_FAILURE;
}
/* Try allocating memory for the color lookup array. */
color = calloc(empty + 1, sizeof color[0]);
if (!color) {
fprintf(stderr, "%s: Matrix size is too large.\n", argv[1]);
return EXIT_FAILURE;
}
fprintf(stderr, "Using Xorshift64* seed %" PRIu64 "\n", prng_randomize());
fflush(stderr);
/* Construct the matrix. */
cluster_fill(plimit);
{
size_t i;
color[empty] = 0xFFFFFFu;
/* Assign random colors. */
for (i = 0; i < empty; i++)
color[i] = prng_u64() >> 40;
}
printf("P6\n%lu %lu 255\n", size, size);
for (r = 0; r < rows; r++)
for (c = 0; c < cols; c++) {
const uint32_t rgb = color[label[ncols*r + c]];
fputc((rgb >> 16) & 255, stdout);
fputc((rgb >> 8) & 255, stdout);
fputc( rgb & 255, stdout);
}
fflush(stdout);
fprintf(stderr, "Done.\n");
return EXIT_SUCCESS;
}
#ifndef CLUSTERS_H
#define CLUSTERS_H
#include <stdlib.h>
#include <inttypes.h>
#include <limits.h>
#include <time.h>
/* This file is in public domain. No guarantees, no warranties.
Written by Nominal Animal <question@nominal-animal.net>.
*/
/* For pure C89 compilers, use '-DSTATIC_INLINE=static' at compile time. */
#ifndef STATIC_INLINE
#define STATIC_INLINE static inline
#endif
#define ERR_INVALID -1 /* Invalid function parameter */
#define ERR_TOOLARGE -2 /* Matrix size is too large */
#define ERR_NOMEM -3 /* Out of memory */
typedef unsigned char cluster_color;
typedef uint32_t cluster_label;
typedef uint64_t cluster_count;
#define CLUSTER_WHITE 0
#define CLUSTER_BLACK 1
#define CLUSTER_NONE UCHAR_MAX /* Reserved */
#define FMT_COLOR "u"
#define FMT_LABEL PRIu32
#define FMT_COUNT PRIu64
typedef struct {
uint64_t state;
} prng;
typedef struct {
/* Pseudo-random number generator used */
prng rng;
/* Actual size of the matrix */
cluster_label rows;
cluster_label cols;
/* Number of matrices the histograms have been collected from */
cluster_count iterations;
/* Probability of each cell being black */
uint64_t p_black;
/* Probability of diagonal connections */
uint64_t d_black;
uint64_t d_white;
/* Cluster colormap contains (rows+2) rows and (cols+1) columns */
cluster_color *map;
/* Disjoint set of (rows) rows and (cols) columns */
cluster_label *djs;
/* Number of occurrences per disjoint set root */
cluster_label *white_roots;
cluster_label *black_roots;
/* Histograms of white and black clusters */
cluster_count *white_histogram;
cluster_count *black_histogram;
} cluster;
#define CLUSTER_INITIALIZER { {0}, 0, 0, 0, 0.0, 0.0, 0.0, NULL, NULL, NULL, NULL, NULL, NULL }
/* Calculate uint64_t limit corresponding to probability p. */
STATIC_INLINE uint64_t probability_limit(const double p)
{
if (p <= 0.0)
return UINT64_C(0);
else
if (p <= 0.5)
return (uint64_t)(p * 18446744073709551615.0);
else
if (p >= 1.0)
return UINT64_C(18446744073709551615);
else
return UINT64_C(18446744073709551615) - (uint64_t)((double)(1.0 - p) * 18446744073709551615.0);
}
/* Return true at probability corresponding to limit 'limit'.
This implements a Xorshift64* pseudo-random number generator. */
STATIC_INLINE int probability(prng *const rng, const uint64_t limit)
{
uint64_t state = rng->state;
uint64_t value;
/* To correctly cover the entire range, we ensure we never generate a zero. */
do {
state ^= state >> 12;
state ^= state << 25;
state ^= state >> 27;
value = state * UINT64_C(2685821657736338717);
} while (!value);
rng->state = state;
return (value <= limit) ? CLUSTER_BLACK : CLUSTER_WHITE;
}
/* Generate a random seed for the Xorshift64* pseudo-random number generator. */
static uint64_t randomize(prng *const rng)
{
unsigned int rounds = 127;
uint64_t state = UINT64_C(3069887672279) * (uint64_t)time(NULL)
^ UINT64_C(60498839) * (uint64_t)clock();
if (!state)
state = 1;
/* Churn the state a bit. */
while (rounds-->0) {
state ^= state >> 12;
state ^= state << 25;
state ^= state >> 27;
}
if (rng)
rng->state = state;
return state;
}
/* Free all resources related to a cluster. */
STATIC_INLINE void free_cluster(cluster *c)
{
if (c) {
/* Free dynamically allocated pointers. Note: free(NULL) is safe. */
free(c->map);
free(c->djs);
free(c->white_roots);
free(c->black_roots);
free(c->white_histogram);
free(c->black_histogram);
c->rng.state = 0;
c->rows = 0;
c->cols = 0;
c->iterations = 0;
c->p_black = 0;
c->d_white = 0;
c->d_black = 0;
c->map = NULL;
c->djs = NULL;
c->white_roots = 0;
c->black_roots = 0;
c->white_histogram = NULL;
c->black_histogram = NULL;
}
}
/* Initialize cluster structure, for a matrix of specified size. */
static int init_cluster(cluster *c, const int rows, const int cols,
const double p_black,
const double d_white, const double d_black)
{
const cluster_label label_cols = cols;
const cluster_label label_rows = rows;
const cluster_label color_rows = rows + 2;
const cluster_label color_cols = cols + 1;
const cluster_label color_cells = color_rows * color_cols;
const cluster_label label_cells = label_rows * label_cols;
const cluster_label labels = label_cells + 2; /* One extra! */
if (!c)
return ERR_INVALID;
c->rng.state = 0; /* Invalid seed for Xorshift64*. */
c->rows = 0;
c->cols = 0;
c->iterations = 0;
c->p_black = 0;
c->d_white = 0;
c->d_black = 0;
c->map = NULL;
c->djs = NULL;
c->white_roots = NULL;
c->black_roots = NULL;
c->white_histogram = NULL;
c->black_histogram = NULL;
if (rows < 1 || cols < 1)
return ERR_INVALID;
if ((unsigned int)color_rows <= (unsigned int)rows ||
(unsigned int)color_cols <= (unsigned int)cols ||
(cluster_label)(color_cells / color_rows) != color_cols ||
(cluster_label)(color_cells / color_cols) != color_rows ||
(cluster_label)(label_cells / label_rows) != label_cols ||
(cluster_label)(label_cells / label_cols) != label_rows)
return ERR_TOOLARGE;
c->black_histogram = calloc(labels, sizeof (cluster_count));
c->white_histogram = calloc(labels, sizeof (cluster_count));
c->black_roots = calloc(labels, sizeof (cluster_label));
c->white_roots = calloc(labels, sizeof (cluster_label));
c->djs = calloc(label_cells, sizeof (cluster_label));
c->map = calloc(color_cells, sizeof (cluster_color));
if (!c->map || !c->djs ||
!c->white_roots || !c->black_roots ||
!c->white_histogram || !c->black_histogram) {
free(c->map);
free(c->djs);
free(c->white_roots);
free(c->black_roots);
free(c->white_histogram);
free(c->black_histogram);
return ERR_NOMEM;
}
c->rows = rows;
c->cols = cols;
c->p_black = probability_limit(p_black);
c->d_white = probability_limit(d_white);
c->d_black = probability_limit(d_black);
/* Initialize the color map to NONE. */
{
cluster_color *ptr = c->map;
cluster_color *const end = c->map + color_cells;
while (ptr < end)
*(ptr++) = CLUSTER_NONE;
}
/* calloc() initialized the other arrays to zeros already. */
return 0;
}
/* Disjoint set: find root. */
STATIC_INLINE cluster_label djs_root(const cluster_label *const djs, cluster_label from)
{
while (from != djs[from])
from = djs[from];
return from;
}
/* Disjoint set: path compression. */
STATIC_INLINE void djs_path(cluster_label *const djs, cluster_label from, const cluster_label root)
{
while (from != root) {
const cluster_label temp = djs[from];
djs[from] = root;
from = temp;
}
}
/* Disjoint set: Flatten. Returns the root, and flattens the path to it. */
STATIC_INLINE cluster_label djs_flatten(cluster_label *const djs, cluster_label from)
{
const cluster_label root = djs_root(djs, from);
djs_path(djs, from, root);
return root;
}
/* Disjoint set: Join two subsets. */
STATIC_INLINE void djs_join2(cluster_label *const djs,
cluster_label from1, cluster_label from2)
{
cluster_label root, temp;
root = djs_root(djs, from1);
temp = djs_root(djs, from2);
if (root > temp)
temp = root;
djs_path(djs, from1, root);
djs_path(djs, from2, root);
}
/* Disjoint set: Join three subsets. */
STATIC_INLINE void djs_join3(cluster_label *const djs,
cluster_label from1, cluster_label from2,
cluster_label from3)
{
cluster_label root, temp;
root = djs_root(djs, from1);
temp = djs_root(djs, from2);
if (root > temp)
root = temp;
temp = djs_root(djs, from3);
if (root > temp)
root = temp;
djs_path(djs, from1, root);
djs_path(djs, from2, root);
djs_path(djs, from3, root);
}
/* Disjoint set: Join four subsets. */
STATIC_INLINE void djs_join4(cluster_label *const djs,
cluster_label from1, cluster_label from2,
cluster_label from3, cluster_label from4)
{
cluster_label root, temp;
root = djs_root(djs, from1);
temp = djs_root(djs, from2);
if (root > temp)
root = temp;
temp = djs_root(djs, from3);
if (root > temp)
root = temp;
temp = djs_root(djs, from4);
if (root > temp)
root = temp;
djs_path(djs, from1, root);
djs_path(djs, from2, root);
djs_path(djs, from3, root);
djs_path(djs, from4, root);
}
/* Disjoint set: Join five subsets. */
STATIC_INLINE void djs_join5(cluster_label *const djs,
cluster_label from1, cluster_label from2,
cluster_label from3, cluster_label from4,
cluster_label from5)
{
cluster_label root, temp;
root = djs_root(djs, from1);
temp = djs_root(djs, from2);
if (root > temp)
root = temp;
temp = djs_root(djs, from3);
if (root > temp)
root = temp;
temp = djs_root(djs, from4);
if (root > temp)
root = temp;
temp = djs_root(djs, from5);
if (root > temp)
root = temp;
djs_path(djs, from1, root);
djs_path(djs, from2, root);
djs_path(djs, from3, root);
djs_path(djs, from4, root);
djs_path(djs, from5, root);
}
static void iterate(cluster *const cl)
{
prng *const rng = &(cl->rng);
uint64_t const p_black = cl->p_black;
uint64_t d_color[2];
cluster_color *const map = cl->map + cl->cols + 2;
cluster_label const map_stride = cl->cols + 1;
cluster_label *const djs = cl->djs;
cluster_label *roots[2];
cluster_label const rows = cl->rows;
cluster_label const cols = cl->cols;
int r, c;
d_color[CLUSTER_WHITE] = cl->d_white;
d_color[CLUSTER_BLACK] = cl->d_black;
roots[CLUSTER_WHITE] = cl->white_roots;
roots[CLUSTER_BLACK] = cl->black_roots;
for (r = 0; r < rows; r++) {
cluster_label const curr_i = r * cols;
cluster_color *const curr_row = map + r * map_stride;
cluster_color *const prev_row = curr_row - map_stride;
for (c = 0; c < cols; c++) {
cluster_color color = probability(rng, p_black);
cluster_label label = curr_i + c;
uint64_t diag = d_color[color];
unsigned int joins = 0;
/* Assign the label and color of the current cell, */
djs[label] = label;
curr_row[c] = color;
/* Because we join left, up-left, up, and up-right, and
all those have been assigned to already, we can do
the necessary joins right now. */
/* Join left? */
joins |= (curr_row[c-1] == color) << 0;
/* Join up? */
joins |= (prev_row[c ] == color) << 1;
/* Join up left? */
joins |= (prev_row[c-1] == color && probability(rng, diag)) << 2;
/* Join up right? */
joins |= (prev_row[c+1] == color && probability(rng, diag)) << 3;
/* Do the corresponding joins. */
switch (joins) {
case 1: /* Left */
djs_join2(djs, label, label - 1);
break;
case 2: /* Up */
djs_join2(djs, label, label - cols);
break;
case 3: /* Left and up */
djs_join3(djs, label, label - 1, label - cols);
break;
case 4: /* Up-left */
djs_join2(djs, label, label - cols - 1);
break;
case 5: /* Left and up-left */
djs_join3(djs, label, label - 1, label - cols - 1);
break;
case 6: /* Up and up-left */
djs_join3(djs, label, label - cols, label - cols - 1);
break;
case 7: /* Left, up, and up-left */
djs_join4(djs, label, label - 1, label - cols, label - cols - 1);
break;
case 8: /* Up-right */
djs_join2(djs, label, label - cols + 1);
break;
case 9: /* Left and up-right */
djs_join3(djs, label, label - 1, label - cols + 1);
break;
case 10: /* Up and up-right */
djs_join3(djs, label, label - cols, label - cols + 1);
break;
case 11: /* Left, up, and up-right */
djs_join4(djs, label, label - 1, label - cols, label - cols + 1);
break;
case 12: /* Up-left and up-right */
djs_join3(djs, label, label - cols - 1, label - cols + 1);
break;
case 13: /* Left, up-left, and up-right */
djs_join4(djs, label, label - 1, label - cols - 1, label - cols + 1);
break;
case 14: /* Up, up-left, and up-right */
djs_join4(djs, label, label - cols, label - cols - 1, label - cols + 1);
break;
case 15: /* Left, up, up-left, and up-right */
djs_join5(djs, label, label - 1, label - cols, label - cols - 1, label - cols + 1);
break;
}
}
}
/* Count the occurrences of each disjoint-set root label. */
if (roots[0] && roots[1]) {
const size_t labels = rows * cols + 2;
/* Clear the counts. */
memset(roots[0], 0, labels * sizeof (cluster_label));
memset(roots[1], 0, labels * sizeof (cluster_label));
for (r = 0; r < rows; r++) {
const cluster_color *const curr_row = map + r * map_stride;
const cluster_label curr_i = r * cols;
for (c = 0; c < cols; c++)
roots[curr_row[c]][djs_flatten(djs, curr_i + c)]++;
}
} else {
size_t i = rows * cols;
while (i-->0)
djs_flatten(djs, i);
}
/* Collect the statistics. */
if (cl->white_histogram && roots[0]) {
const cluster_label *const root_count = roots[0];
cluster_count *const histogram = cl->white_histogram;
size_t i = rows * cols + 1;
while (i-->0)
histogram[root_count[i]]++;
}
if (cl->black_histogram && roots[1]) {
const cluster_label *const root_count = roots[1];
cluster_count *const histogram = cl->black_histogram;
size_t i = rows * cols + 1;
while (i-->0)
histogram[root_count[i]]++;
}
/* Note: index zero and (rows*cols+1) are zero in the histogram, for ease of scanning. */
if (cl->white_histogram || cl->black_histogram) {
const size_t n = rows * cols + 1;
if (cl->white_histogram) {
cl->white_histogram[0] = 0;
cl->white_histogram[n] = 0;
}
if (cl->black_histogram) {
cl->black_histogram[0] = 0;
cl->black_histogram[n] = 0;
}
}
/* One more iteration performed. */
cl->iterations++;
}
#endif /* CLUSTERS_H */
#include <stdlib.h>
#include <inttypes.h>
#include <string.h>
#include <stdio.h>
#include "clusters.h"
/* This file is in public domain. No guarantees, no warranties.
Written by Nominal Animal <question@nominal-animal.net>.
*/
#define DEFAULT_ROWS 100
#define DEFAULT_COLS 100
#define DEFAULT_P_BLACK 0.0
#define DEFAULT_D_WHITE 0.0
#define DEFAULT_D_BLACK 0.0
#define DEFAULT_ITERS 1
int usage(const char *argv0)
{
fprintf(stderr, "\n");
fprintf(stderr, "Usage: %s [ -h | --help ]\n", argv0);
fprintf(stderr, " %s OPTIONS [ > output.txt ]\n", argv0);
fprintf(stderr, "\n");
fprintf(stderr, "Options:\n");
fprintf(stderr, " rows=SIZE Set number of rows. Default is %d.\n", DEFAULT_ROWS);
fprintf(stderr, " cols=SIZE Set number of columns. Default is %d.\n", DEFAULT_ROWS);
fprintf(stderr, " L=SIZE Set rows=SIZE and cols=SIZE.\n");
fprintf(stderr, " black=P Set the probability of a cell to be black. Default is %g.\n", DEFAULT_P_BLACK);
fprintf(stderr, " All non-black cells are white.\n");
fprintf(stderr, " dwhite=P Set the probability of white cells connecting diagonally.\n");
fprintf(stderr, " Default is %g.\n", DEFAULT_D_WHITE);
fprintf(stderr, " dblack=P Set the probability of black cells connecting diagonally.\n");
fprintf(stderr, " Default is %g.\n", DEFAULT_D_BLACK);
fprintf(stderr, " N=COUNT Number of iterations for gathering statistics. Default is %d.\n", DEFAULT_ITERS);
fprintf(stderr, " seed=U64 Set the Xorshift64* pseudorandom number generator seed; nonzero.\n");
fprintf(stderr, " Default is to pick one randomly (based on time).\n");
fprintf(stderr, "\n");
fprintf(stderr, "The output consists of comment lines and data lines.\n");
fprintf(stderr, "Comment lines begin with a #:\n");
fprintf(stderr, " # This is a comment line.\n");
fprintf(stderr, "Each data line contains a cluster size, the number of white clusters of that size\n");
fprintf(stderr, "observed during iterations, the number of black clusters of that size observed\n");
fprintf(stderr, "during iterations, and the number of any clusters of that size observed:\n");
fprintf(stderr, " SIZE WHITE_CLUSTERS BLACK_CLUSTERS TOTAL_CLUSTERS\n");
fprintf(stderr, "\n");
return EXIT_SUCCESS;
}
int main(int argc, char *argv[])
{
int rows = DEFAULT_ROWS;
int cols = DEFAULT_COLS;
double p_black = DEFAULT_P_BLACK;
double d_white = DEFAULT_D_WHITE;
double d_black = DEFAULT_D_BLACK;
long iters = DEFAULT_ITERS;
uint64_t seed = 0;
cluster c = CLUSTER_INITIALIZER;
int arg, itemp;
uint64_t u64temp;
double dtemp;
long ltemp;
char dummy;
size_t n;
size_t i;
if (argc < 2)
return usage(argv[0]);
for (arg = 1; arg < argc; arg++)
if (!strcmp(argv[arg], "-h") || !strcmp(argv[arg], "/?") || !strcmp(argv[arg], "--help"))
return usage(argv[0]);
else
if (sscanf(argv[arg], "L=%d %c", &itemp, &dummy) == 1 ||
sscanf(argv[arg], "l=%d %c", &itemp, &dummy) == 1 ||
sscanf(argv[arg], "size=%d %c", &itemp, &dummy) == 1) {
rows = itemp;
cols = itemp;
} else
if (sscanf(argv[arg], "seed=%" SCNu64 " %c", &u64temp, &dummy) == 1 ||
sscanf(argv[arg], "seed=%" SCNx64 " %c", &u64temp, &dummy) == 1 ||
sscanf(argv[arg], "s=%" SCNu64 " %c", &u64temp, &dummy) == 1 ||
sscanf(argv[arg], "s=%" SCNx64 " %c", &u64temp, &dummy) == 1) {
seed = u64temp;
} else
if (sscanf(argv[arg], "N=%ld %c", <emp, &dummy) == 1 ||
sscanf(argv[arg], "n=%ld %c", <emp, &dummy) == 1 ||
sscanf(argv[arg], "count=%ld %c", <emp, &dummy) == 1) {
iters = ltemp;
} else
if (sscanf(argv[arg], "rows=%d %c", &itemp, &dummy) == 1 ||
sscanf(argv[arg], "r=%d %c", &itemp, &dummy) == 1 ||
sscanf(argv[arg], "height=%d %c", &itemp, &dummy) == 1 ||
sscanf(argv[arg], "h=%d %c", &itemp, &dummy) == 1) {
rows = itemp;
} else
if (sscanf(argv[arg], "columns=%d %c", &itemp, &dummy) == 1 ||
sscanf(argv[arg], "cols=%d %c", &itemp, &dummy) == 1 ||
sscanf(argv[arg], "c=%d %c", &itemp, &dummy) == 1 ||
sscanf(argv[arg], "width=%d %c", &itemp, &dummy) == 1 ||
sscanf(argv[arg], "w=%d %c", &itemp, &dummy) == 1) {
cols = itemp;
} else
if (sscanf(argv[arg], "black=%lf %c", &dtemp, &dummy) == 1 ||
sscanf(argv[arg], "p0=%lf %c", &dtemp, &dummy) == 1 ||
sscanf(argv[arg], "b=%lf %c", &dtemp, &dummy) == 1 ||
sscanf(argv[arg], "P=%lf %c", &dtemp, &dummy) == 1 ||
sscanf(argv[arg], "p0=%lf %c", &dtemp, &dummy) == 1 ||
sscanf(argv[arg], "p=%lf %c", &dtemp, &dummy) == 1) {
p_black = dtemp;
} else
if (sscanf(argv[arg], "white=%lf %c", &dtemp, &dummy) == 1 ||
sscanf(argv[arg], "p1=%lf %c", &dtemp, &dummy) == 1) {
p_black = 1.0 - dtemp;
} else
if (sscanf(argv[arg], "dwhite=%lf %c", &dtemp, &dummy) == 1 ||
sscanf(argv[arg], "dw=%lf %c", &dtemp, &dummy) == 1 ||
sscanf(argv[arg], "d0=%lf %c", &dtemp, &dummy) == 1) {
d_white = dtemp;
} else
if (sscanf(argv[arg], "dblack=%lf %c", &dtemp, &dummy) == 1 ||
sscanf(argv[arg], "db=%lf %c", &dtemp, &dummy) == 1 ||
sscanf(argv[arg], "d1=%lf %c", &dtemp, &dummy) == 1) {
d_black = dtemp;
} else {
fprintf(stderr, "%s: Unknown option.\n", argv[arg]);
return EXIT_FAILURE;
}
switch (init_cluster(&c, rows, cols, p_black, d_white, d_black)) {
case 0: break; /* OK */
case ERR_INVALID:
fprintf(stderr, "Invalid size.\n");
return EXIT_FAILURE;
case ERR_TOOLARGE:
fprintf(stderr, "Size is too large.\n");
return EXIT_FAILURE;
case ERR_NOMEM:
fprintf(stderr, "Not enough memory.\n");
return EXIT_FAILURE;
}
if (!seed)
seed = randomize(NULL);
c.rng.state = seed;
/* The largest possible cluster has n cells. */
n = (size_t)rows * (size_t)cols;
/* Print the comments describing the initial parameters. */
printf("# seed: %" PRIu64 " (Xorshift 64*)\n", seed);
printf("# size: %d rows, %d columns\n", rows, cols);
printf("# P(black): %.6f (%" PRIu64 "/18446744073709551615)\n", p_black, c.p_black);
printf("# P(black connected diagonally): %.6f (%" PRIu64 "/18446744073709551615)\n", d_black, c.d_black);
printf("# P(white connected diagonally): %.6f (%" PRIu64 "/18446744073709551615)\n", d_white, c.d_white);
fflush(stdout);
while (iters-->0)
iterate(&c);
printf("# Iterations: %" PRIu64 "\n", c.iterations);
printf("#\n");
printf("# size white_clusters(size) black_clusters(size) clusters(size)\n");
/* Note: c._histogram[0] == c._histogram[n] == 0, for ease of scanning. */
for (i = 1; i <= n; i++)
if (c.white_histogram[i-1] || c.white_histogram[i] || c.white_histogram[i+1] ||
c.black_histogram[i-1] || c.black_histogram[i] || c.black_histogram[i+1])
printf("%lu %" FMT_COUNT " %" FMT_COUNT " %" FMT_COUNT "\n",
(unsigned long)i,
c.white_histogram[i],
c.black_histogram[i],
c.white_histogram[i]+c.black_histogram[i]);
/* Since we are exiting anyway, this is not really necessary. */
free_cluster(&c);
/* All done. */
return EXIT_SUCCESS;
}