next up previous index
Next: Exercises Up: MPE Graphics Previous: Program cxgraphics.c

Program life_g.c

This program is somewhat more complicated than the previous example. But it is still quite simple in its use of graphics. There is one new element here that is related to our use of MPI_Sendrecv in section 5.2.5.

The computation itself is quite similar to Jacobi iterations. The difference is that here the only values allowed in the matrix are zero and one. The iteration takes a sum over all neighbours of an (i,j) element of the matrix, including the diagonal neighbours too (here there is the first difference), so we have 8 neighbours this time instead of just 4 as was the case in the diffusion problem, and now

The result is a peculiar pattern that keeps evolving within the matrix. The program calculates the evolution and displays the pattern as it evolves.

Here is the code.

#include <mpi.h>
#define MPE_GRAPHICS
#include "mpe.h"
#include <stdio.h>

static MPE_XGraph graph;

static char *displayname = 0;
extern void   srand48();
extern double drand48();
extern char * malloc();
static int width = 400, height = 400;

#define BORN 1
#define DIES 0

/* The Life function */
double life(matrix_size, ntimes, comm)
int matrix_size;
int ntimes;
MPI_Comm comm;
{
  int      rank, size ;
  int      next, prev ;
  int      i, j, k;
  int      mysize, sum ;
  int    **matrix, **temp, **addr ;
  double   slavetime, totaltime, starttime ;
  int      my_offset;

  /* Determine size and my rank in communicator */
  MPI_Comm_size(comm, &size) ;
  MPI_Comm_rank(comm, &rank) ;

  /* Set neighbors */
  if (rank == 0) 
    prev = MPI_PROC_NULL;
  else
    prev = rank-1;
  if (rank == size - 1)
    next = MPI_PROC_NULL;
  else
    next = rank+1;

  /* Determine my part of the matrix */
  mysize = matrix_size/size + ((rank < (matrix_size % size)) ? 1 : 0 ) ;
  my_offset = rank * (matrix_size/size);
  if (rank > (matrix_size % size)) my_offset += (matrix_size % size);
  else                             my_offset += rank;

  /* allocate the memory dynamically for the matrix */
  matrix = (int **)malloc(sizeof(int *)*(mysize+2)) ;
  temp = (int **)malloc(sizeof(int *)*(mysize+2)) ;
  for (i = 0; i < mysize+2; i++) {
    matrix[i] = (int *)malloc(sizeof(int)*(matrix_size+2)) ;
    temp[i] = (int *)malloc(sizeof(int)*(matrix_size+2)) ;
  }

  /* Initialize the boundaries of the life matrix */
  for (j = 0; j < matrix_size+2; j++)
    matrix[0][j] = matrix[mysize+1][j] = temp[0][j] = temp[mysize+1][j] = DIES ;
  for (i = 0; i < mysize+2; i++)
    matrix[i][0] = matrix[i][matrix_size+1] = temp[i][0] = temp[i][matrix_size+1] = DIES ;

  /* Initialize the life matrix */
  for (i = 1; i <= mysize; i++)  {
    srand48((long)(1000^(i-1+mysize))) ;
    for (j = 1; j<= matrix_size; j++)
      if (drand48() > 0.5)  
        matrix[i][j] = BORN ;
      else
        matrix[i][j] = DIES ;
  }


  /* Open the graphics display */
  MPE_Open_graphics( &graph, MPI_COMM_WORLD, displayname, 
		   -1, -1, width, height, 0 );

  /* Play the game of life for given number of iterations */
  starttime = MPI_Wtime() ;
  for (k = 0; k < ntimes; k++) {
    MPI_Request      req[4];
    MPI_Status       status[4];

    /* Send and receive boundary information */
    MPI_Isend(&matrix[1][0],matrix_size+2,MPI_INT,prev,0,comm,req); 
    MPI_Irecv(&matrix[0][0],matrix_size+2,MPI_INT,prev,0,comm,req+1);
    MPI_Isend(&matrix[mysize][0],matrix_size+2,MPI_INT,next,0,comm,req+2);
    MPI_Irecv(&matrix[mysize+1][0],matrix_size+2,MPI_INT,next,0,comm,req+3);
    MPI_Waitall(4, req, status);

    /* For each element of the matrix ... */ 
    for (i = 1; i <= mysize; i++) {
      for (j = 1; j < matrix_size+1; j++) {

        /* find out the value of the current cell */
        sum = matrix[i-1][j-1] + matrix[i-1][j] + matrix[i-1][j+1] 
          + matrix[i][j-1] + matrix[i][j+1] 
            + matrix[i+1][j-1] + matrix[i+1][j] + matrix[i+1][j+1] ;
        
        /* check if the cell dies or life is born */
        if (sum < 2 || sum > 3)
          temp[i][j] = DIES ;
        else if (sum == 3)
          temp[i][j] = BORN ;
        else
          temp[i][j] = matrix[i][j] ;
	{ int xloc, yloc, xwid, ywid;
	  xloc = ((my_offset + i - 1) * width) / matrix_size;
	  yloc = ((j - 1) * height) / matrix_size;
	  xwid = ((my_offset + i) * width) / matrix_size - xloc;
	  ywid = (j * height) / matrix_size - yloc;
	  MPE_Fill_rectangle( graph, xloc, yloc, xwid, ywid, temp[i][j] );
	  }
      }
    }
    MPE_Update( graph );
    /* Swap the matrices */
    addr = matrix ;
    matrix = temp ;
    temp = addr ;
  }

  /* Return the average time taken/processor */
  slavetime = MPI_Wtime() - starttime;
  MPI_Reduce (&slavetime, &totaltime, 1, MPI_DOUBLE, MPI_SUM, 0, comm);
  return (totaltime/(double)size);}
  

int main(argc, argv)
int argc ;
char *argv[] ;
{
  int rank, N, iters ;
  double time ;
  
  MPI_Init (&argc, &argv);
  MPI_Comm_rank(MPI_COMM_WORLD, &rank) ;

  /* If I'm process 0, determine the matrix size and # of iterations */
  /* This relies on the MPI implementation properly flushing output
     that does not end in a newline.  MPI does not require this, though
     high-quality implementations will do this.
   */
#if !defined (SGI_MPI) && !defined (IBM_MPI)
  if ( rank == 0 ) {
    printf("Matrix Size : ") ;
    scanf("%d",&N) ;
    printf("Iterations : ") ;
    scanf("%d",&iters) ;
  }
#else
  N=20;
  iters=50;
#endif
  
  /* Broadcast the size and # of iterations to all processes */
  MPI_Bcast(&N, 1, MPI_INT, 0, MPI_COMM_WORLD) ;
  MPI_Bcast(&iters, 1, MPI_INT, 0, MPI_COMM_WORLD) ;

  if (argc > 2 && strcmp( argv[1], "-display" ) == 0) {
      displayname = malloc( strlen( argv[2] ) + 1 );
      strcpy( displayname, argv[2] );
      }
  /* Call the life routine */
  time = life ( N, iters, MPI_COMM_WORLD );

  /* Print the total time taken */
  if (rank == 0)
    printf("[%d] Life finished in %lf seconds\n",rank,time/100);

  MPE_Close_graphics(&graph);
  MPI_Finalize();
}

The code comprises two distinct parts. There is the short main program that is quite easy to decipher and a little more complicated function life that does all the work.

So let us begin with main.

After the initial MPI incantations, process number 0 obtains some information from standard input, namely, the size of the matrix and the desired number of iterations:

  if ( rank == 0 ) {
    printf("Matrix Size : ") ;
    scanf("%d",&N) ;
    printf("Iterations : ") ;
    scanf("%d",&iters) ;
  }
This information is then broadcast to other processes:
  MPI_Bcast(&N, 1, MPI_INT, 0, MPI_COMM_WORLD) ;
  MPI_Bcast(&iters, 1, MPI_INT, 0, MPI_COMM_WORLD) ;
Every process also analyses the command  line (every process gets a copy of argv and argc from MPI_Init) to find if the user has specified the -display option, in which case the name of the X11 display is also read from the command line:
  if (argc > 2 && strcmp( argv[1], "-display" ) == 0) {
      displayname = malloc( strlen( argv[2] ) + 1 );
      strcpy( displayname, argv[2] );
      }
Otherwise the name of the display will be obtained from MPDs, i.e., from the environmental variable DISPLAY at the time MPD was booted on the console node. Here displayname is a global (static) variable, so it doesn't have to be passed to function life. The display itself is going to be opened and then written to by life. Function life is called as follows:
  time = life ( N, iters, MPI_COMM_WORLD );
After the program returns, graphics are closed and MPI itself shuts down:
  MPE_Close_graphics(&graph);
  MPI_Finalize();
}
And this is the end of main.

Function life begins its life from finding about the size of the communicator that's been passed to it as a variable. Then every process finds about its rank within this communicator:

  MPI_Comm_size(comm, &size) ;
  MPI_Comm_rank(comm, &rank) ;
Now we can see some interesting manipulations, whose purpose is very similar to what we did in the diffusion program. But there we simply used powerful MPI functions, where here these manipulations are carried out explicitly.

First the processes are organized into one dimensional grid, so that each process knows about its previous process and its next process:

  /* Set neighbors */
  if (rank == 0) 
    prev = MPI_PROC_NULL;
  else
    prev = rank-1;
  if (rank == size - 1)
    next = MPI_PROC_NULL;
  else
    next = rank+1;
Now the matrix, which is going to be $N\times N$ is going to be divided amongst the processes (within life the parameter N is called matrix_size):
  mysize = matrix_size/size + ((rank < (matrix_size % size)) ? 1 : 0 ) ;
  my_offset = rank * (matrix_size/size);
  if (rank > (matrix_size % size)) my_offset += (matrix_size % size);
  else                             my_offset += rank;
Having decided about the portion of the matrix each process is going to look after, we allocate memory for it:
  /* allocate the memory dynamically for the matrix */
  matrix = (int **)malloc(sizeof(int *)*(mysize+2)) ;
  temp = (int **)malloc(sizeof(int *)*(mysize+2)) ;
  for (i = 0; i < mysize+2; i++) {
    matrix[i] = (int *)malloc(sizeof(int)*(matrix_size+2)) ;
    temp[i] = (int *)malloc(sizeof(int)*(matrix_size+2)) ;
  }
Now we have the same problem with the ``ghost columns'' (the division of the matrix is one dimensional in this program) that we had in the diffusion program and we initialize these regions to zero:
  /* Initialize the boundaries of the life matrix */
  for (j = 0; j < matrix_size+2; j++)
    matrix[0][j] = matrix[mysize+1][j] = temp[0][j] = temp[mysize+1][j] = DIES ;
  for (i = 0; i < mysize+2; i++)
    matrix[i][0] = matrix[i][matrix_size+1] = temp[i][0] = temp[i][matrix_size+1
] = DIES ;
The interiors of the matrices that belong to the processes, on the other hand, are initialized to ones and zeros at random:
  /* Initialize the life matrix */
  for (i = 1; i <= mysize; i++)  {
    srand48((long)(1000^(i-1+mysize))) ;
    for (j = 1; j<= matrix_size; j++)
      if (drand48() > 0.5)  
        matrix[i][j] = BORN ;
      else
        matrix[i][j] = DIES ;
  }

Now function life opens the graph on the X11 display:

  /* Open the graphics display */
  MPE_Open_graphics( &graph, MPI_COMM_WORLD, displayname, 
                   -1, -1, width, height, 0 );
where width and height are constants, equal 400 each. The window is going to be displayed in the top left corner of the X11 display. The open is not collective. Because the size of the X11 window is fixed in this program, the matrix, which may be either smaller or larger than $400\times 400$ will have to be either squeezed or stretched in order to be displayed.

Now we begin the iterations. Each iteration begins with the exchange of border columns:

    /* Send and receive boundary information */
    MPI_Isend(&matrix[1][0],matrix_size+2,MPI_INT,prev,0,comm,req); 
    MPI_Irecv(&matrix[0][0],matrix_size+2,MPI_INT,prev,0,comm,req+1);
    MPI_Isend(&matrix[mysize][0],matrix_size+2,MPI_INT,next,0,comm,req+2);
    MPI_Irecv(&matrix[mysize+1][0],matrix_size+2,MPI_INT,next,0,comm,req+3);
    MPI_Waitall(4, req, status);
This is done differently from the example discussed in section about diffusion. I have mentioned there that MPI_Sendrecv saves us from serialization of the communication. Another way to avoid such serialization is to use non-blocking sends and receives. Observe that we use a new function here,  MPI_Waitall, to do the waiting. This function waits not for just one request to be completed. Instead it waits for the whole array of requests to be completed. In this case the array, req, comprises 4 individual requests. The variable status points to the array of statuses rather than to an individual status.

Having exchanged the border information we can now commence the computation itself, which is as I have already explained at the beginning of this section:

    /* For each element of the matrix ... */ 
    for (i = 1; i <= mysize; i++) {
      for (j = 1; j < matrix_size+1; j++) {

        /* find out the value of the current cell */
        sum = matrix[i-1][j-1] + matrix[i-1][j] + matrix[i-1][j+1] 
          + matrix[i][j-1] + matrix[i][j+1] 
            + matrix[i+1][j-1] + matrix[i+1][j] + matrix[i+1][j+1] ;
        
        /* check if the cell dies or life is born */
        if (sum < 2 || sum > 3)
          temp[i][j] = DIES ;
        else if (sum == 3)
          temp[i][j] = BORN ;
        else
          temp[i][j] = matrix[i][j] ;
        { 

          [ ... draw the graph ... ]

          }
      }
    }
For each element of the matrix we evaluate the sum over all its eight neighbours and then set the value of the element itself to 1, 0 or leave it unchanged, depending on what the sum is.

Now we have to display the matrix by either squeezing it or stretching, as required, and then throwing the result on the display:

        { int xloc, yloc, xwid, ywid;
          xloc = ((my_offset + i - 1) * width) / matrix_size;
          yloc = ((j - 1) * height) / matrix_size;
          xwid = ((my_offset + i) * width) / matrix_size - xloc;
          ywid = (j * height) / matrix_size - yloc;
          MPE_Fill_rectangle( graph, xloc, yloc, xwid, ywid, temp[i][j] );
          }
The drawing itself is done by filling the rectangle at an appropriate position with the blot, whose color is given by the value of the corresponding matrix element, i.e., with either zero or one, which is going to translate into white and black.

Having finished with the update, the current picture is flushed onto the display by calling

    MPE_Update( graph );
and the matrix itself is being updated too:
    /* Swap the matrices */
    addr = matrix ;
    matrix = temp ;
    temp = addr ;
An important point here is that the updates must not be carried out on the actual matrix (why?). This is why we have two matrices here. The original one and then the updated one. At the end of each iteration we swap them around, i.e., the updated one becomes the original one (for the next iteration) and the original one becomes the temporary space on which the update is going to be calculated. This is done without any data copying. Instead we merely reassign the pointers.

Having finished with all the requested iterations, function life estimates time spent on the whole operation and returns it to the calling program:

  /* Return the average time taken/processor */
  slavetime = MPI_Wtime() - starttime;
  MPI_Reduce (&slavetime, &totaltime, 1, MPI_DOUBLE, MPI_SUM, 0, comm);
  return (totaltime/(double)size);}


next up previous index
Next: Exercises Up: MPE Graphics Previous: Program cxgraphics.c
Zdzislaw Meglicki
2004-04-29