Welcome to OGeek Q&A Community for programmer and developer-Open, Learning and Share
Welcome To Ask or Share your Answers For Others

Categories

0 votes
153 views
in Technique[技术] by (71.8m points)

Implementing 8-Connectivity Hoshen-Kopelman Algorithm in C

I found here an implementation for Hoshen-Kopelman Algorithm, But it checks neighbors only up and left, meaning that a diagonal connection is not considered a connection.

How can I improve this code so that even a diagonal connection will be considered a connection?

In the following example I expect 1 object and not 7 objects:

4 5
1 0 1 0 1
0 1 0 1 0
1 0 1 0 0
0 0 1 0 0
 --input--
  1   0   1   0   1
  0   1   0   1   0
  1   0   1   0   0
  0   0   1   0   0
 --output--
  1   0   2   0   3
  0   4   0   5   0
  6   0   7   0   0
  0   0   7   0   0
HK reports 7 clusters found

This is the implementation (full code can be found here):

#include <stdio.h>
#include <stdlib.h>
#include <assert.h>

/* Implementation of Union-Find Algorithm */


/* The 'labels' array has the meaning that labels[x] is an alias for the label x; by
   following this chain until x == labels[x], you can find the canonical name of an
   equivalence class.  The labels start at one; labels[0] is a special value indicating
   the highest label already used. */

int* labels;
int  n_labels = 0;     /* length of the labels array */

/*  uf_find returns the canonical label for the equivalence class containing x */

int uf_find(int x)
{
    int y = x;
    while (labels[y] != y)
        y = labels[y];

    while (labels[x] != x)
    {
        int z = labels[x];
        labels[x] = y;
        x = z;
    }
    return y;
}

/*  uf_union joins two equivalence classes and returns the canonical label of the resulting class. */

int uf_union(int x, int y)
{
    return labels[uf_find(x)] = uf_find(y);
}

/*  uf_make_set creates a new equivalence class and returns its label */

int uf_make_set(void)
{
    labels[0] ++;
    assert(labels[0] < n_labels);
    labels[labels[0]] = labels[0];
    return labels[0];
}

/*  uf_intitialize sets up the data structures needed by the union-find implementation. */

void uf_initialize(int max_labels)
{
    n_labels = max_labels;
    labels = calloc(sizeof(int), n_labels);
    labels[0] = 0;
}

/*  uf_done frees the memory used by the union-find data structures */

void uf_done(void)
{
    n_labels = 0;
    free(labels);
    labels = 0;
}

/* End Union-Find implementation */

#define max(a,b) (a>b?a:b)
#define min(a,b) (a>b?b:a)

/* print_matrix prints out a matrix that is set up in the "pointer to pointers" scheme
   (aka, an array of arrays); this is incompatible with C's usual representation of 2D
   arrays, but allows for 2D arrays with dimensions determined at run-time */

void print_matrix(int** matrix, int m, int n)
{
    for (int i = 0; i < m; i++)
    {
        for (int j = 0; j < n; j++)
            printf("%3d ", matrix[i][j]);
        printf("
");
    }
}


/* Label the clusters in "matrix".  Return the total number of clusters found. */

int hoshen_kopelman(int** matrix, int m, int n)
{

    uf_initialize(m * n / 2);

    /* scan the matrix */

    for (int y = 0; y < m; y++)
    {
        for (int x = 0; x < n; x++)
        {
            if (matrix[y][x])
            {                        // if occupied ...

                int up = (y == 0 ? 0 : matrix[y - 1][x]);    //  look up  
                int left = (x == 0 ? 0 : matrix[y][x - 1]);  //  look left

                switch (!!up + !!left)
                {

                case 0:
                    matrix[y][x] = uf_make_set();      // a new cluster
                    break;

                case 1:                              // part of an existing cluster
                    matrix[y][x] = max(up, left);    // whichever is nonzero is labelled
                    break;

                case 2:                              // this site binds two clusters
                    matrix[y][x] = uf_union(up, left);
                    break;
                }

            }
        }
    }


    /* apply the relabeling to the matrix */

    /* This is a little bit sneaky.. we create a mapping from the canonical labels
       determined by union/find into a new set of canonical labels, which are
       guaranteed to be sequential. */

    int* new_labels = calloc(sizeof(int), n_labels); // allocate array, initialized to zero

    for (int i = 0; i < m; i++)
        for (int j = 0; j < n; j++)
            if (matrix[i][j])
            {
                int x = uf_find(matrix[i][j]);
                if (new_labels[x] == 0)
                {
                    new_labels[0]++;
                    new_labels[x] = new_labels[0];
                }
                matrix[i][j] = new_labels[x];
            }

    int total_clusters = new_labels[0];

    free(new_labels);
    uf_done();

    return total_clusters;
}

/* This procedure checks to see that any occupied neighbors of an occupied site
   have the same label. */

void check_labelling(int** matrix, int m, int n)
{
    int N, S, E, W;
    for (int i = 0; i < m; i++)
        for (int j = 0; j < n; j++)
            if (matrix[i][j])
            {
                N = (i == 0 ? 0 : matrix[i - 1][j]);
                S = (i == m - 1 ? 0 : matrix[i + 1][j]);
                E = (j == n - 1 ? 0 : matrix[i][j + 1]);
                W = (j == 0 ? 0 : matrix[i][j - 1]);

                assert(N == 0 || matrix[i][j] == N);
                assert(S == 0 || matrix[i][j] == S);
                assert(E == 0 || matrix[i][j] == E);
                assert(W == 0 || matrix[i][j] == W);
            }
}

/* The sample program reads in a matrix from standard input, runs the HK algorithm on
   it, and prints out the results.  The form of the input is two integers giving the
   dimensions of the matrix, followed by the matrix elements (with data separated by
   whitespace).

a sample input file is the following:

8 8
1 1 1 1 1 1 1 1
0 0 0 0 0 0 0 1
1 0 0 0 0 1 0 1
1 0 0 1 0 1 0 1
1 0 0 1 0 1 0 1
1 0 0 1 1 1 0 1
1 1 1 1 0 0 0 1
0 0 0 1 1 1 0 1

this sample input gives the following output:

 --input--
  1   1   1   1   1   1   1   1
  0   0   0   0   0   0   0   1
  1   0   0   0   0   1   0   1
  1   0   0   1   0   1   0   1
  1   0   0   1   0   1   0   1
  1   0   0   1   1   1   0   1
  1   1   1   1   0   0   0   1
  0   0   0   1   1   1   0   1
 --output--
  1   1   1   1   1   1   1   1
  0   0   0   0   0   0   0   1
  2   0   0   0   0   2   0   1
  2   0   0   2   0   2   0   1
  2   0   0   2   0   2   0   1
  2   0   0   2   2   2   0   1
  2   2   2   2   0   0   0   1
  0   0   0   2   2   2   0   1
HK reports 2 clusters found

*/

int main(int argc, char** argv)
{

    int m, n;
    int** matrix;

    /* Read in the matrix from standard input

       The whitespace-deliminated matrix input is preceeded
       by the number of rows and number of columns */

    while (2 == scanf_s("%d %d", &m, &n))
    {  // m = rows, n = columns

        matrix = (int**)calloc(m, sizeof(int*));

        for (int i = 0; i < m; i++)
        {
            matrix[i] = (int*)calloc(n, sizeof(int));
            for (int j = 0; j < n; j++)
                scanf_s("%d", &(matrix[i][j]));
        }

        printf_s(" --input-- 
");

        print_matrix(matrix, m, n);

        printf(" --output-- 
");

        /* Process the matrix */

        int clusters = hoshen_kopelman(matrix, m, n);

        /* Output the result */

        print_matrix(matrix, m, n);

        check_labelling(matrix, m, n);

        printf("HK reports %d clusters found
", clusters);

        for (int i = 0; i < m; i++)
            free(matrix[i]);
        free(matrix);
    }

    return 0;
}

I tried to change the function hoshen_kopelman as described below, but I still get 2 objects instead of 1:

int hoshen_kopelman(int** matrix, int m, int n)
{

    uf_initialize(m * n / 2);

    /* scan the matrix */

    for (int y = 0; y < m; y++)
    {
        for (int x = 0; x < n; x++)
        {
            if (matrix[y][x])
            {                        // if occupied ...

                int up = (y == 0 ? 0 : matrix[y - 1][x]);    //  look up  
                int left = (x == 0 ? 0 : matrix[y][x - 1]);  //  look left
                
                // ----------- THE NEW CODE -------------
                if (x > 0) 
                {
                    if (up == 0 && y > 0) // left+up
                        up = matrix[y - 1][x - 1];

                    if (left == 0 && y < m - 1) // left+down
                        left = matrix[y + 1][x - 1];
                }
                // ---------- END NEW CODE --------------

                switch (!!up + !!left)
                {

                case 0:
                    matrix[y][x] = uf_make_set();      // a new cluster
                    break;

                case 1:                              // part of an existing cluster
                    matrix[y][x] = max(up, left);    // whichever is nonzero is labelled
                    break;

                case 2:                              // this site binds two clusters
                    matrix[y][x] = uf_union(up, left);
                    break;
                }

            }
        }
    }


    /* apply the relabeling to the matrix */

    /* This is a little bit sneaky.. we create a mapping from the canonical labels
       determined by union/find into a new set of canonical labels, which are
       guaranteed to be sequential. */

    int* new_labels = calloc(sizeof(int), n_labels); // allocate array, initialized to zero

    for (int i = 0; i < m; i++)
        for (int j = 0; j < n; j++)
            if (matrix[i][j])
            {
                int x = uf_find(matrix[i][j]);
                if (new_labels[x] == 0)
                {
                    new_labels[0]++;
                    new_labels[x] = new_labels[0];
                }
                matrix[i][j] = new_labels[x];
            }

    int total_clusters = new_labels[0];

    free(new_labels);
    uf_done();

    return total_clusters;
}

The following output is now obtained (I am expecting 1 and got 2):

4 5
1 0 1 0 1
0 1 0 1 0
1 0 1 0 0
0 0 1 0 0
 --input--
  1   0   1   0   1
  0   1   0   1   0
  1   0   1   0   0
  0   0   1   0   0
 --output--
  1   0   1   0   1
  0   1   0   1   0
  2   0   1   0   0
  0   0   1   0   0
HK reports 2 clusters found

What is the correct way to correct the code to check all 8 neighbors?


与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…
Welcome To Ask or Share your Answers For Others

1 Reply

0 votes
by (71.8m points)

I led you astray saying to check down-left. The algorithm relies on the current node it is examining being after all the neighbors it checks. So you need to check left, up, up-left, and up-right. You can use this in place of your new code:

                if (y > 0) 
                {
                    if (left == 0 && x > 0) // left+up
                        left = matrix[y - 1][x - 1];

                    if (up == 0 && x < n-1) // right+up
                        up = matrix[y - 1][x + 1];
                }

与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…
OGeek|极客中国-欢迎来到极客的世界,一个免费开放的程序员编程交流平台!开放,进步,分享!让技术改变生活,让极客改变未来! Welcome to OGeek Q&A Community for programmer and developer-Open, Learning and Share
Click Here to Ask a Question

...