next up previous index
Next: Chunking Up: Partitioning MPI-IO/HDF5 Datasets Previous: Striding Data

Scattering Data

Whereas in the previous example we have divided the dataset into columns assigning every second column to a process participating in the MPI pool (which was restricted to two processes only), in this example we are going to divide the dataset differently. The dataset is going to be of size $8\times4$, i.e., we'll have 8 rows and 4 columns and 4 processes in the pool. Each process is going to write its own rank number (plus 1) on the dataset in a different place, so that the final picture is going to look as follows:

\begin{displaymath}\begin{array}{cccc}
1 & 3 & 1 & 3 \\
2 & 4 & 2 & 4 \\
1 & 3...
...
2 & 4 & 2 & 4 \\
1 & 3 & 1 & 3 \\
2 & 4 & 2 & 4
\end{array}\end{displaymath}

This is called scattering the data, although the data isn't really scattered at random at all - it is written according to a pattern, but it is written in a highly non-contiguous (and therefore inefficient) manner. I can't think of a context where you would need to do something like this, but it's nice to know that you can.

Here is how to compile, link and run the program, and what comes out of it eventually:

gustav@bh1 $ pwd
/N/B/gustav/src/I590/HDF5/mpi-io
gustav@bh1 $ h5cc -o hyperslab_by_pattern hyperslab_by_pattern.c 
gustav@bh1 $ mv hyperslab_by_pattern ~/bin
gustav@bh1 $ cd
gustav@bh1 $ mpdboot
gustav@bh1 $ mpdtrace | wc
     20      20      99
gustav@bh1 $ cd /N/gpfs/gustav
gustav@bh1 $ ls
SDS_col.h5  SDS_row.h5  t_mpio_1wMr  test  tests
gustav@bh1 $ mpiexec -n 4 hyperslab_by_pattern
gustav@bh1 $ ls
SDS_col.h5  SDS_pat.h5  SDS_row.h5  t_mpio_1wMr  test  tests
gustav@bh1 $ h5dump SDS_pat.h5
HDF5 "SDS_pat.h5" {
GROUP "/" {
   DATASET "IntArray" {
      DATATYPE  H5T_STD_I32LE
      DATASPACE  SIMPLE { ( 8, 4 ) / ( 8, 4 ) }
      DATA {
         1, 3, 1, 3,
         2, 4, 2, 4,
         1, 3, 1, 3,
         2, 4, 2, 4,
         1, 3, 1, 3,
         2, 4, 2, 4,
         1, 3, 1, 3,
         2, 4, 2, 4
      }
   }
}
}
gustav@bh1 $ mpdallexit
gustav@bh1 $

The listing of the program that comes from the NCSA HDF5 Tutorial is shown below.

/*  
 *  This example writes data to the HDF5 file following some pattern
 *             - | - | ......
 *             * V * V ......
 *             - | - | ......
 *             * V * V ......
 *             ..............
 *  Number of processes is assumed to be 4.
 */
 
#include "hdf5.h"

#define H5FILE_NAME     "SDS_pat.h5"
#define DATASETNAME 	"IntArray" 
#define NX     8                      /* dataset dimensions */
#define NY     4 
#define RANK   2
#define RANK1  1

int
main (int argc, char **argv)
{
    /*
     * HDF5 APIs definitions
     */ 	
    hid_t       file_id, dset_id;         /* file and dataset identifiers */
    hid_t       filespace, memspace;      /* file and memory dataspace identifiers */
    hsize_t     dimsf[2];                 /* dataset dimensions */
    hsize_t     dimsm[1];                 /* dataset dimensions */
    int         *data;                    /* pointer to data buffer to write */
    hsize_t	count[2];	          /* hyperslab selection parameters */
    hsize_t	stride[2];
    hssize_t	offset[2];
    hid_t	plist_id;                 /* property list identifier */
    int         i;
    herr_t	status;

    /*
     * MPI variables
     */
    int mpi_size, mpi_rank;
    MPI_Comm comm  = MPI_COMM_WORLD;
    MPI_Info info  = MPI_INFO_NULL;

    /*
     * Initialize MPI
     */
    MPI_Init(&argc, &argv);
    MPI_Comm_size(comm, &mpi_size);
    MPI_Comm_rank(comm, &mpi_rank);  
    /*
     * Exit if number of processes is not 4. 
     */
    if (mpi_size != 4) {
       printf("This example is set up to use 4 processes exactly\n");
       printf("Quitting...\n");
       return 0;
    }
 
    /* 
     * Set up file access property list with parallel I/O access
     */
     plist_id = H5Pcreate(H5P_FILE_ACCESS);
     H5Pset_fapl_mpio(plist_id, comm, info);

    /*
     * Create a new file collectively and release property list identifier.
     */
    file_id = H5Fcreate(H5FILE_NAME, H5F_ACC_TRUNC, H5P_DEFAULT, plist_id);
    H5Pclose(plist_id);
   

    /*
     * Create the dataspace for the dataset.
     */
    dimsf[0] = NX;
    dimsf[1] = NY;
    dimsm[0] = NX;   
    filespace = H5Screate_simple(RANK, dimsf, NULL); 
    memspace  = H5Screate_simple(RANK1, dimsm, NULL); 

    /*
     * Create the dataset with default properties and close filespace.
     */
    dset_id = H5Dcreate(file_id, DATASETNAME, H5T_NATIVE_INT, filespace,
			H5P_DEFAULT);
    H5Sclose(filespace);

    /* 
     * Each process defines dataset in memory and writes it to the hyperslab
     * in the file.
     */
    count[0] = 4;
    count[1] = 2;
    stride[0] = 2;
    stride[1] = 2;
    if(mpi_rank == 0) {
       offset[0] = 0;
       offset[1] = 0;
    } 
    if(mpi_rank == 1) {
       offset[0] = 1;
       offset[1] = 0;
    }
    if(mpi_rank == 2) {
       offset[0] = 0;
       offset[1] = 1;
    } 
    if(mpi_rank == 3) {
       offset[0] = 1;
       offset[1] = 1;
    } 

    /*
     * Select hyperslab in the file.
     */
    filespace = H5Dget_space(dset_id);
    status = H5Sselect_hyperslab(filespace, H5S_SELECT_SET, offset, stride, count, NULL);

    /*
     * Initialize data buffer 
     */
    data = (int *) malloc(sizeof(int)*dimsm[0]);
    for (i=0; i < (int)dimsm[0]; i++) {
        data[i] = mpi_rank + 1;
    }

    /*
     * Create property list for collective dataset write.
     */
    plist_id = H5Pcreate(H5P_DATASET_XFER);
    H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE);
    
    status = H5Dwrite(dset_id, H5T_NATIVE_INT, memspace, filespace,
		      plist_id, data);
    free(data);

    /*
     * Close/release resources.
     */
    H5Dclose(dset_id);
    H5Sclose(filespace);
    H5Sclose(memspace);
    H5Pclose(plist_id);
    H5Fclose(file_id);
 
    MPI_Finalize();

    return 0;
}

Let us analyze this program now. I'll skip the MPI opening, the abominable error handling, and then creation of the file itself (with an appropriate MPI-related property list), because it's all the same stuff as before, and we're going to halt at the point where the data spaces are created:

#define NX     8   
#define NY     4 
#define RANK   2
#define RANK1  1
...
    dimsf[0] = NX;
    dimsf[1] = NY;
    dimsm[0] = NX;   
    filespace = H5Screate_simple(RANK, dimsf, NULL); 
    memspace  = H5Screate_simple(RANK1, dimsm, NULL);
So, the filespace is going to be $8\times4$ and the memspace is a one-dimensional array of length 8. Then we create the dataset on the HDF5 file and discard the filespace:
#define DATASETNAME 	"IntArray" 
...
    dset_id = H5Dcreate(file_id, DATASETNAME, H5T_NATIVE_INT, filespace,
			H5P_DEFAULT);
    H5Sclose(filespace);
Now we get down to selecting the hyperslab of the dataset. We initialize strides, counts and offsets:
    count[0] = 4;
    count[1] = 2;
    stride[0] = 2;
    stride[1] = 2;
    if(mpi_rank == 0) {
       offset[0] = 0;
       offset[1] = 0;
    } 
    if(mpi_rank == 1) {
       offset[0] = 1;
       offset[1] = 0;
    }
    if(mpi_rank == 2) {
       offset[0] = 0;
       offset[1] = 1;
    } 
    if(mpi_rank == 3) {
       offset[0] = 1;
       offset[1] = 1;
    }
Each process, as you see, is going to write $4\times2$ blocks of data separated by a stride of 2 in each dimension. The block sizes will default to 1, because we are going to NULL in the block size slot. Process of rank 0 starts at the (0,0) location, process of rank 1 starts at the (1,0) location, process of rank 2 starts at the (0,1) location and process of rank 3 starts at the (1,1) location. With counts, strides and offsets defined, we call H5Sselect_hyperslab:
    filespace = H5Dget_space(dset_id);
    status = H5Sselect_hyperslab(filespace, H5S_SELECT_SET, offset, stride, count, NULL);
Each process is going to fill its memory dataspace simply with its own rank number (plus 1):
    data = (int *) malloc(sizeof(int)*dimsm[0]);
    for (i=0; i < (int)dimsm[0]; i++) {
        data[i] = mpi_rank + 1;
    }
Now we are ready to write the data. We prep the property list for a collective write using MPI_File_write_all and write the data itself so that it gets mapped from memspace to filespace:
    plist_id = H5Pcreate(H5P_DATASET_XFER);
    H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE);
    status = H5Dwrite(dset_id, H5T_NATIVE_INT, memspace, filespace,
		      plist_id, data);
And this is it. We free data, then we close the dataset, the filespace, the memory space, the property list and the file itself, and finally the processes meet on MPI_Finalize and exit.


next up previous index
Next: Chunking Up: Partitioning MPI-IO/HDF5 Datasets Previous: Striding Data
Zdzislaw Meglicki
2004-04-29