This program is somewhat more complicated than the previous
example. But it is still quite simple in its use of graphics.
There is one new element here that is related to our
use of MPI_Sendrecv in section 5.2.5.
The computation itself is quite similar to Jacobi iterations. The difference is that here the only values allowed in the matrix are zero and one. The iteration takes a sum over all neighbours of an (i,j) element of the matrix, including the diagonal neighbours too (here there is the first difference), so we have 8 neighbours this time instead of just 4 as was the case in the diffusion problem, and now
Here is the code.
#include <mpi.h>
#define MPE_GRAPHICS
#include "mpe.h"
#include <stdio.h>
static MPE_XGraph graph;
static char *displayname = 0;
extern void srand48();
extern double drand48();
extern char * malloc();
static int width = 400, height = 400;
#define BORN 1
#define DIES 0
/* The Life function */
double life(matrix_size, ntimes, comm)
int matrix_size;
int ntimes;
MPI_Comm comm;
{
int rank, size ;
int next, prev ;
int i, j, k;
int mysize, sum ;
int **matrix, **temp, **addr ;
double slavetime, totaltime, starttime ;
int my_offset;
/* Determine size and my rank in communicator */
MPI_Comm_size(comm, &size) ;
MPI_Comm_rank(comm, &rank) ;
/* Set neighbors */
if (rank == 0)
prev = MPI_PROC_NULL;
else
prev = rank-1;
if (rank == size - 1)
next = MPI_PROC_NULL;
else
next = rank+1;
/* Determine my part of the matrix */
mysize = matrix_size/size + ((rank < (matrix_size % size)) ? 1 : 0 ) ;
my_offset = rank * (matrix_size/size);
if (rank > (matrix_size % size)) my_offset += (matrix_size % size);
else my_offset += rank;
/* allocate the memory dynamically for the matrix */
matrix = (int **)malloc(sizeof(int *)*(mysize+2)) ;
temp = (int **)malloc(sizeof(int *)*(mysize+2)) ;
for (i = 0; i < mysize+2; i++) {
matrix[i] = (int *)malloc(sizeof(int)*(matrix_size+2)) ;
temp[i] = (int *)malloc(sizeof(int)*(matrix_size+2)) ;
}
/* Initialize the boundaries of the life matrix */
for (j = 0; j < matrix_size+2; j++)
matrix[0][j] = matrix[mysize+1][j] = temp[0][j] = temp[mysize+1][j] = DIES ;
for (i = 0; i < mysize+2; i++)
matrix[i][0] = matrix[i][matrix_size+1] = temp[i][0] = temp[i][matrix_size+1] = DIES ;
/* Initialize the life matrix */
for (i = 1; i <= mysize; i++) {
srand48((long)(1000^(i-1+mysize))) ;
for (j = 1; j<= matrix_size; j++)
if (drand48() > 0.5)
matrix[i][j] = BORN ;
else
matrix[i][j] = DIES ;
}
/* Open the graphics display */
MPE_Open_graphics( &graph, MPI_COMM_WORLD, displayname,
-1, -1, width, height, 0 );
/* Play the game of life for given number of iterations */
starttime = MPI_Wtime() ;
for (k = 0; k < ntimes; k++) {
MPI_Request req[4];
MPI_Status status[4];
/* Send and receive boundary information */
MPI_Isend(&matrix[1][0],matrix_size+2,MPI_INT,prev,0,comm,req);
MPI_Irecv(&matrix[0][0],matrix_size+2,MPI_INT,prev,0,comm,req+1);
MPI_Isend(&matrix[mysize][0],matrix_size+2,MPI_INT,next,0,comm,req+2);
MPI_Irecv(&matrix[mysize+1][0],matrix_size+2,MPI_INT,next,0,comm,req+3);
MPI_Waitall(4, req, status);
/* For each element of the matrix ... */
for (i = 1; i <= mysize; i++) {
for (j = 1; j < matrix_size+1; j++) {
/* find out the value of the current cell */
sum = matrix[i-1][j-1] + matrix[i-1][j] + matrix[i-1][j+1]
+ matrix[i][j-1] + matrix[i][j+1]
+ matrix[i+1][j-1] + matrix[i+1][j] + matrix[i+1][j+1] ;
/* check if the cell dies or life is born */
if (sum < 2 || sum > 3)
temp[i][j] = DIES ;
else if (sum == 3)
temp[i][j] = BORN ;
else
temp[i][j] = matrix[i][j] ;
{ int xloc, yloc, xwid, ywid;
xloc = ((my_offset + i - 1) * width) / matrix_size;
yloc = ((j - 1) * height) / matrix_size;
xwid = ((my_offset + i) * width) / matrix_size - xloc;
ywid = (j * height) / matrix_size - yloc;
MPE_Fill_rectangle( graph, xloc, yloc, xwid, ywid, temp[i][j] );
}
}
}
MPE_Update( graph );
/* Swap the matrices */
addr = matrix ;
matrix = temp ;
temp = addr ;
}
/* Return the average time taken/processor */
slavetime = MPI_Wtime() - starttime;
MPI_Reduce (&slavetime, &totaltime, 1, MPI_DOUBLE, MPI_SUM, 0, comm);
return (totaltime/(double)size);}
int main(argc, argv)
int argc ;
char *argv[] ;
{
int rank, N, iters ;
double time ;
MPI_Init (&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank) ;
/* If I'm process 0, determine the matrix size and # of iterations */
/* This relies on the MPI implementation properly flushing output
that does not end in a newline. MPI does not require this, though
high-quality implementations will do this.
*/
#if !defined (SGI_MPI) && !defined (IBM_MPI)
if ( rank == 0 ) {
printf("Matrix Size : ") ;
scanf("%d",&N) ;
printf("Iterations : ") ;
scanf("%d",&iters) ;
}
#else
N=20;
iters=50;
#endif
/* Broadcast the size and # of iterations to all processes */
MPI_Bcast(&N, 1, MPI_INT, 0, MPI_COMM_WORLD) ;
MPI_Bcast(&iters, 1, MPI_INT, 0, MPI_COMM_WORLD) ;
if (argc > 2 && strcmp( argv[1], "-display" ) == 0) {
displayname = malloc( strlen( argv[2] ) + 1 );
strcpy( displayname, argv[2] );
}
/* Call the life routine */
time = life ( N, iters, MPI_COMM_WORLD );
/* Print the total time taken */
if (rank == 0)
printf("[%d] Life finished in %lf seconds\n",rank,time/100);
MPE_Close_graphics(&graph);
MPI_Finalize();
}
The code comprises two distinct parts. There is the short main
program that is quite easy to decipher and a little more complicated
function life that does all the work.
So let us begin with main.
After the initial MPI incantations, process number 0 obtains some information from standard input, namely, the size of the matrix and the desired number of iterations:
if ( rank == 0 ) {
printf("Matrix Size : ") ;
scanf("%d",&N) ;
printf("Iterations : ") ;
scanf("%d",&iters) ;
}
This information is then broadcast to other processes:MPI_Bcast(&N, 1, MPI_INT, 0, MPI_COMM_WORLD) ; MPI_Bcast(&iters, 1, MPI_INT, 0, MPI_COMM_WORLD) ;Every process also analyses the command line (every process gets a copy of
argv and argc from MPI_Init)
to find if the user has specified the -display option, in which
case the name of the X11 display is also read from the command line:
if (argc > 2 && strcmp( argv[1], "-display" ) == 0) {
displayname = malloc( strlen( argv[2] ) + 1 );
strcpy( displayname, argv[2] );
}
Otherwise the name of the display will be obtained from MPDs, i.e.,
from the environmental variable DISPLAY at the time MPD was
booted on the console node. Here displayname is a global (static)
variable, so it doesn't have to be passed to function life.
The display itself is going to be opened and then written to
by life. Function life is called as follows:time = life ( N, iters, MPI_COMM_WORLD );After the program returns, graphics are closed and MPI itself shuts down:
MPE_Close_graphics(&graph); MPI_Finalize(); }And this is the end of
main.
Function life begins its life from finding about the size
of the communicator that's been passed to it as a variable. Then every
process finds about its rank within this communicator:
MPI_Comm_size(comm, &size) ; MPI_Comm_rank(comm, &rank) ;Now we can see some interesting manipulations, whose purpose is very similar to what we did in the diffusion program. But there we simply used powerful MPI functions, where here these manipulations are carried out explicitly.
First the processes are organized into one dimensional grid, so that
each process knows about its previous process and its next process:
/* Set neighbors */
if (rank == 0)
prev = MPI_PROC_NULL;
else
prev = rank-1;
if (rank == size - 1)
next = MPI_PROC_NULL;
else
next = rank+1;
Now the matrix, which is going to be life the parameter N
is called matrix_size):mysize = matrix_size/size + ((rank < (matrix_size % size)) ? 1 : 0 ) ; my_offset = rank * (matrix_size/size); if (rank > (matrix_size % size)) my_offset += (matrix_size % size); else my_offset += rank;Having decided about the portion of the matrix each process is going to look after, we allocate memory for it:
/* allocate the memory dynamically for the matrix */
matrix = (int **)malloc(sizeof(int *)*(mysize+2)) ;
temp = (int **)malloc(sizeof(int *)*(mysize+2)) ;
for (i = 0; i < mysize+2; i++) {
matrix[i] = (int *)malloc(sizeof(int)*(matrix_size+2)) ;
temp[i] = (int *)malloc(sizeof(int)*(matrix_size+2)) ;
}
Now we have the same problem with the ``ghost columns'' (the division
of the matrix is one dimensional in this program) that we had
in the diffusion program and we initialize these regions to zero:
/* Initialize the boundaries of the life matrix */
for (j = 0; j < matrix_size+2; j++)
matrix[0][j] = matrix[mysize+1][j] = temp[0][j] = temp[mysize+1][j] = DIES ;
for (i = 0; i < mysize+2; i++)
matrix[i][0] = matrix[i][matrix_size+1] = temp[i][0] = temp[i][matrix_size+1
] = DIES ;
The interiors of the matrices that belong to the processes, on the other hand, are initialized to ones and
zeros at random:
/* Initialize the life matrix */
for (i = 1; i <= mysize; i++) {
srand48((long)(1000^(i-1+mysize))) ;
for (j = 1; j<= matrix_size; j++)
if (drand48() > 0.5)
matrix[i][j] = BORN ;
else
matrix[i][j] = DIES ;
}
Now function life opens the graph on the X11 display:
/* Open the graphics display */
MPE_Open_graphics( &graph, MPI_COMM_WORLD, displayname,
-1, -1, width, height, 0 );
where width and height are constants, equal 400 each.
The window is going to be displayed in the top left corner of the
X11 display. The open is not collective. Because the size of the
X11 window is fixed in this program, the matrix, which may be either
smaller or larger than
Now we begin the iterations. Each iteration begins with the exchange of border columns:
/* Send and receive boundary information */
MPI_Isend(&matrix[1][0],matrix_size+2,MPI_INT,prev,0,comm,req);
MPI_Irecv(&matrix[0][0],matrix_size+2,MPI_INT,prev,0,comm,req+1);
MPI_Isend(&matrix[mysize][0],matrix_size+2,MPI_INT,next,0,comm,req+2);
MPI_Irecv(&matrix[mysize+1][0],matrix_size+2,MPI_INT,next,0,comm,req+3);
MPI_Waitall(4, req, status);
This is done differently from the example discussed in section about
diffusion. I have mentioned there that MPI_Sendrecv saves
us from serialization of the communication. Another way to
avoid such serialization is to use non-blocking sends and receives.
Observe that we use a new function here,
MPI_Waitall,
to do the waiting. This function waits not for just one request to
be completed. Instead it waits for the whole array of requests
to be completed. In this case the array, req, comprises
4 individual requests. The variable status points to
the array of statuses rather than to an individual status.
Having exchanged the border information we can now commence the computation itself, which is as I have already explained at the beginning of this section:
/* For each element of the matrix ... */
for (i = 1; i <= mysize; i++) {
for (j = 1; j < matrix_size+1; j++) {
/* find out the value of the current cell */
sum = matrix[i-1][j-1] + matrix[i-1][j] + matrix[i-1][j+1]
+ matrix[i][j-1] + matrix[i][j+1]
+ matrix[i+1][j-1] + matrix[i+1][j] + matrix[i+1][j+1] ;
/* check if the cell dies or life is born */
if (sum < 2 || sum > 3)
temp[i][j] = DIES ;
else if (sum == 3)
temp[i][j] = BORN ;
else
temp[i][j] = matrix[i][j] ;
{
[ ... draw the graph ... ]
}
}
}
For each element of the matrix we evaluate the sum over all
its eight neighbours and then set the value of the element
itself to 1, 0 or leave it unchanged, depending on what the sum is.
Now we have to display the matrix by either squeezing it or stretching, as required, and then throwing the result on the display:
{ int xloc, yloc, xwid, ywid;
xloc = ((my_offset + i - 1) * width) / matrix_size;
yloc = ((j - 1) * height) / matrix_size;
xwid = ((my_offset + i) * width) / matrix_size - xloc;
ywid = (j * height) / matrix_size - yloc;
MPE_Fill_rectangle( graph, xloc, yloc, xwid, ywid, temp[i][j] );
}
The drawing itself is done by filling the rectangle at an appropriate
position with the blot, whose color is given by the value of the corresponding
matrix element, i.e., with either zero or one, which is going to translate
into white and black.
Having finished with the update, the current picture is flushed onto the display by calling
MPE_Update( graph );
and the matrix itself is being updated too:
/* Swap the matrices */
addr = matrix ;
matrix = temp ;
temp = addr ;
An important point here is that the updates must not be carried
out on the actual matrix (why?). This is why we have two matrices here.
The original one and then the updated one. At the end of each iteration
we swap them around, i.e., the updated one becomes the original one
(for the next iteration) and the original one becomes the temporary
space on which the update is going to be calculated. This is done without
any data copying. Instead we merely reassign the pointers.
Having finished with all the requested iterations, function life
estimates time spent on the whole operation and returns it to the
calling program:
/* Return the average time taken/processor */ slavetime = MPI_Wtime() - starttime; MPI_Reduce (&slavetime, &totaltime, 1, MPI_DOUBLE, MPI_SUM, 0, comm); return (totaltime/(double)size);}