You are here

Exercise 1 solution

For C programmers: a different version of the solution is available here. It involves non-blocking communications instead of the usual blocking ones.

C  

/*
 *    It evolves the equation:
 *                            u,t + u,x + u,y = 0
 *    Using a Lax scheme.
 *    The initial data is a cruddy gaussian.
 *    Boundaries are flat: copying the value of the neighbour
 */

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <mpi.h>

#define NX 100
#define NY 100
#define LX 2.0
#define LY 2.0

#define sigma 0.1
#define tmax 100

/* conversions from discrete to real coordinates */
float ix2x(int ix){
    return ((ix - 1) - (NX-1) / 2.0)*LX/(NX-1);
}

/*
 * each processor has a different offset.
 * I also pass proc_y as an argument instead of proc_me since it will
 * become useful when saving the output files.
 */
float iy2y(int iy, int proc_y, int nprocs){
    return ((iy-1) - (NY-1) / 2.0 + proc_y*(NY / nprocs) + (proc_y < NY % nprocs? proc_y:NY % nprocs)) * LY/(NY-1);
}

/* Function for evaluating the results that calculates the sum of the array values */
float summa(int nx, int ny, float* val){
float summma=0.0;
int ix,iy;
    for(iy=1;iy<=ny;++iy)
        for(ix=1;ix<=nx;++ix){
          summma+=val[((nx+2)*iy)+ix];
 }
  return(summma);
}

/*
 * initialize the system with a gaussian temperature distribution
 */
int init_transport(float *temp, int NLY, int proc_me,int nprocs){
    int ix,iy;
    float x,y;

    for(iy=1;iy<=NLY;++iy)
        for(ix=1;ix<=NX;++ix){
            x=ix2x(ix);
            y=iy2y(iy, proc_me, nprocs);
            temp[((NX+2)*iy)+ix] = tmax*exp((-((x*x)+(y*y)))/(2.0*(sigma*sigma)));
        }
    return 0;
}

/*
 * save the temperature distribution
 * the ascii format is suitable for splot gnuplot function
 */
int save_gnuplot(char *filename, float *temp, int NLY, int proc_me, int nprocs){
    float *buf;
    int ix, iy, iproc=0;
    FILE *fp;

    MPI_Status status;

    if(proc_me == 0){

        buf = (float *) malloc ((NX+2)*(NLY+2)*sizeof(float));
        fp = fopen(filename, "w");

        for(iy=1;iy<=NLY;++iy){
            for(ix=1;ix<=NX;++ix){
                fprintf(fp, "\t%f\t%f\t%g\n", ix2x(ix), iy2y(iy, proc_me, nprocs), temp[((NX+2)*iy)+ix]);
            }
            fprintf(fp, "\n");
        }

        for(iproc=1; iproc<nprocs; iproc++){
          int nly, count;
          MPI_Recv(buf, (NX+2)*(NLY+2), MPI_REAL, iproc, 0, MPI_COMM_WORLD, &status);
          MPI_Get_count( &status, MPI_REAL, &count );
          nly = count / (NX + 2) - 2;

          for(iy=1;iy<=nly;++iy){
            for(ix=1;ix<=NX;++ix){
              fprintf(fp, "\t%f\t%f\t%g\n", ix2x(ix), iy2y(iy, iproc, nprocs), buf[((NX+2)*iy)+ix]);
            }

            fprintf(fp, "\n");
          }
        }
        fclose(fp);
        free(buf);
    }

    else{
        MPI_Send(temp, (NX+2)*(NLY+2), MPI_REAL, 0, 0, MPI_COMM_WORLD);
    }

    return 0;
}

int update_boundaries_FLAT(float *temp, int NLY, int nprocs, int proc_me, int proc_up, int proc_down){
    MPI_Status status, status1;
    int iy=0, ix=0;

    for(iy=1;iy<=NLY;++iy){
        temp[(NX+2)*iy] = temp[((NX+2)*iy)+1];
        temp[((NX+2)*iy)+(NX+1)] = temp[((NX+2)*iy)+NX];
    }

/*
 * only the lowest has the lower boundary condition
 */
    if (proc_me==0) for(ix=0;ix<=NX+1;++ix) temp[ix] = temp[(NX+2)+ix];

/*
 * only the highest has the upper boundary condition
 */
    if (proc_me==nprocs-1) for(ix=0;ix<=NX+1;++ix) temp[((NX+2)*(NLY+1))+ix] = temp[((NX+2)*(NLY))+ix];

/*
 *communicate the ghost-cells
 *
 * lower-down
 */
    MPI_Sendrecv(&temp[(NX+2)+1], NX, MPI_REAL, proc_down, 0, &temp[((NX+2)*(NLY+1))+1], NX, MPI_REAL, proc_up, 
    0, MPI_COMM_WORLD, &status);

/*
 * higher-up
 */
    MPI_Sendrecv(&temp[((NX+2)*(NLY))+1], NX, MPI_REAL, proc_up, 0, &temp[1], NX, MPI_REAL, proc_down, 
   0, MPI_COMM_WORLD, &status1);

    return 0;
}

int evolve(float dtfact, float *temp, float *temp_new, int NLY){
    float dx, dt;
    int ix, iy;
    float temp0;

    dx = 2*LX/NX;
    dt = dtfact*dx/sqrt(3.0);

    for(iy=1;iy<=NLY;++iy)
        for(ix=1;ix<=NX;++ix){
            temp0 = temp[((NX+2)*iy)+ix];
            temp_new[((NX+2)*iy)+ix] = temp0-0.5*dt*(temp[((NX+2)*(iy+1))+ix]-temp[((NX+2)*(iy-1))+ix]+
            temp[((NX+2)*iy)+(ix+1)]-temp[((NX+2)*iy)+(ix-1)])/dx;
        }

    for(iy=0;iy<NLY+2;++iy)
        for(ix=0;ix<NX+2;++ix)
            temp[((NX+2)*iy)+ix] = temp_new[((NX+2)*iy)+ix];

    return 0;
}

int main(int argc, char* argv[]){
    int nprocs, proc_me, proc_up, proc_down;
    int i=0, NLY;
    float *temp, *temp_new,before,after;

    MPI_Init(&argc, &argv);
    MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
    MPI_Comm_rank(MPI_COMM_WORLD, &proc_me);

    /* Check if  nprocs is compatible with NY */
    if ( NY < nprocs ) {
      if (proc_me==0) {
        printf("\nTrying to run with %d processes, while the maximum allowed size is %d (NY)\n",nprocs,NY);
      }
      MPI_Finalize();
    }

/*
 *  all the communications from/to MPI_PROC_NULL do nothing
 */
    proc_down = proc_me-1;
    proc_up = proc_me+1;

    if(proc_down < 0) proc_down = MPI_PROC_NULL;
    if(proc_up == nprocs) proc_up = MPI_PROC_NULL;

    NLY = NY/nprocs;
    if (proc_me < NY % nprocs)
      NLY++;

    temp = (float *) malloc ((NX+2)*(NLY+2)*sizeof(float));
    temp_new = (float *) malloc ((NX+2)*(NLY+2)*sizeof(float));

    init_transport(temp, NLY, proc_me,nprocs);
    update_boundaries_FLAT(temp, NLY, nprocs, proc_me, proc_up, proc_down);

    float tbefore =summa(NX, NLY, temp);
    MPI_Reduce(&tbefore, &before, 1, MPI_FLOAT, MPI_SUM, 0, MPI_COMM_WORLD);
        if(proc_me==0) {
           printf(" sum temp before: %f\n",before);
    }

    save_gnuplot("transport.dat", temp, NLY, proc_me, nprocs);

    for(i=1;i<=500;++i){
        evolve(0.1, temp, temp_new, NLY);
        update_boundaries_FLAT(temp, NLY, nprocs, proc_me, proc_up, proc_down);
       }
   

    save_gnuplot("transport_end.dat", temp, NLY, proc_me, nprocs);
        float tafter =summa(NX, NLY, temp);
        MPI_Reduce(&tafter, &after, 1, MPI_FLOAT, MPI_SUM, 0, MPI_COMM_WORLD);
        if(proc_me==0) {
           printf(" sum temp after: %f\n",after);
    }

    free(temp);
    free(temp_new);
    MPI_Finalize();
/*    return 0; */
}

Fortran

!
!     It evolves the equation:
!                             u,t + u,x + u,y = 0
!     Using a Lax scheme.
!     The initial data is a cruddy gaussian.
!     Boundaries are flat: copying the value of the neighbour
!
! parallel WITHOUT TOPOLOGIES: distributed by columns
! dynamic allocation to decide run-time the size to allocate
! no reminder for teaching purposes

module comms
  implicit none
  save
  include 'mpif.h'

  integer :: nprocs
  integer :: proc_me,proc_up,proc_down

CONTAINS
  subroutine INIT_COMMS
    integer ierr

    call MPI_COMM_SIZE(MPI_COMM_WORLD,nprocs,ierr)
    call MPI_COMM_RANK(MPI_COMM_WORLD,proc_me,ierr)

    proc_up = proc_me + 1
    proc_down = proc_me - 1

    ! all the communications from/to MPI_PROC_NULL do nothing
    if (proc_down < 0) proc_down=MPI_PROC_NULL
    if (proc_up >= nprocs) proc_up=MPI_PROC_NULL

  end subroutine INIT_COMMS
end module comms

module transport

  use comms
  implicit none
  save

  integer, parameter :: NX=100
  integer, parameter :: NY=100
  real, parameter    :: LX=2.0
  real, parameter    :: LY=2.0
  integer            :: NLY

  real,allocatable   ::  temp(:,:)
  real,allocatable   ::  temp_new(:,:)

CONTAINS
  !conversions from discrete to real coordinates
  real function ix2x(ix)
    integer ix
    ix2x = ((ix-1)-(NX-1)/2.0)*LX/(NX-1)
  end function ix2x

 ! THE FOLLOWING CHANGES: every processor has a different offset.
 ! I also pass proc_y as an argument instead of proc_me since it will
 ! become useful when saving the output files.

  real function iy2y(iy,proc_y)

    integer proc_y
    integer iy

    if (proc_y < mod(NY,nprocs)) then
      iy2y = (iy-1 - (NY-1)/2.0 + proc_y*(NY/nprocs) + proc_y) * LY/(NY-1)
    else
      iy2y = (iy-1 - (NY-1)/2.0 + proc_y*(NY/nprocs) + mod(NY,nprocs)) * LY/(NY-1)
    endif
  end function iy2y

  ! initialize the system with a gaussian temperature distribution
  ! in parallel, allocate the system too
   subroutine init_transport
    use comms

    integer ix,iy
    real x,y
    real,parameter :: sigma = 0.1
    real,parameter :: tmax = 100

    NLY = NY/nprocs
    if(proc_me < mod(NY,nprocs)) NLY=NLY+1

    allocate (     temp(0:NX+1 , 0:NLY+1) )
    allocate ( temp_new(0:NX+1 , 0:NLY+1) )

    !! DO loops on local indeces only
    do iy=1,NLY
       do ix=1,NX
          x=ix2x(ix)
          y=iy2y(iy,proc_me)
          temp(ix,iy) = tmax*exp(-(x**2+y**2)/(2.0*sigma**2))
       enddo
    enddo

  end subroutine init_transport

  ! save the temperature distribution
  ! the ascii format is suitable for splot gnuplot function
  ! collective, use temp_new as temporary buffer

  subroutine save_gnuplot(filename)
    character(len=*) filename
    integer ix,iy
    integer iproc
    integer ierr
    integer status(MPI_STATUS_SIZE)

    if (proc_me == 0) then
       open(unit=20,file=filename,form='formatted')

       do iy=1,NLY
          do ix=1,NX
             write(20,*) ix2x(ix),iy2y(iy,0),temp(ix,iy)
          enddo
          write(20,*)
       enddo

       do iproc=1,nprocs-1
          call MPI_RECV(temp_new,(NX+2)*(NLY+2),MPI_REAL,iproc,0, &
               MPI_COMM_WORLD,status,ierr)

          do iy=1,NLY
             do ix=1,NX
                write(20,*) ix2x(ix),iy2y(iy,iproc),temp_new(ix,iy)
             enddo
             write(20,*)
          enddo
       enddo
       close(20)

    else
       call MPI_SEND(temp,(NX+2)*(NLY+2),MPI_REAL,0,0,MPI_COMM_WORLD,ierr)
    endif
  end subroutine save_gnuplot

  !! NY => NLY
  subroutine update_boundaries_FLAT
    integer status(MPI_STATUS_SIZE)
    integer ierr

    temp(0   ,  1:NLY) = temp(1  , 1:NLY)
    temp(NX+1, 1:NLY) = temp(NX   , 1:NLY)

    !! only the lowest has the lower boundary condition
    if (proc_me==0)        temp(1:NX , 0)    = temp(1:NX , 1)
    !! only the highest has the upper boundary condition
    if (proc_me==nprocs-1) temp(1:NX , NLY+1) = temp(1:NX , NLY)

    !! communicate the ghost-cells
    !! lower-down
    call MPI_SENDRECV(temp(1,1),NX,MPI_REAL,proc_down,0, &
         temp(1,NLY+1),NX,MPI_REAL,proc_up,0,MPI_COMM_WORLD,status,ierr)

    !! higher-up
    call MPI_SENDRECV(temp(1,NLY),NX,MPI_REAL,proc_up,0, &
         temp(1,0),NX,MPI_REAL,proc_down,0,MPI_COMM_WORLD,status,ierr)

  end subroutine update_boundaries_FLAT

  subroutine evolve(dtfact)
    real dtfact
    real dx,dt
    integer ix,iy
    real temp0

    dx = 2*LX/NX
    dt = dtfact*dx/sqrt(3.0)
    do iy=1,NLY
       do ix=1,NX
          temp0 = temp(ix,iy)
          temp_new(ix,iy) = temp0 - 0.5 * dt * &
               (temp(ix+1,iy)-temp(ix-1,iy)+temp(ix,iy+1)-temp(ix,iy-1)) / dx
       enddo
    enddo

    temp = temp_new

  end subroutine evolve
end module transport

program prova
  use transport
  use comms
  implicit none

  integer i
  integer ierr
  real before,tbefore,after,tafter

  call MPI_INIT(ierr)
  call init_comms
  call init_transport
  call update_boundaries_FLAT

  tbefore=sum(temp(1:NX,1:NLY))
  call MPI_Reduce(tbefore,before,1,MPI_REAL,MPI_SUM,0,MPI_COMM_WORLD,ierr)
  if (proc_me==0)  write(*,*) 'sum temp before',before

  call save_gnuplot('transport.dat')
  do i=1,500
     call evolve(0.1)
     call update_boundaries_FLAT
  enddo
  call save_gnuplot('transport_end.dat')

  tafter=sum(temp(1:NX,1:NLY))
  call MPI_Reduce(tafter,after,1,MPI_REAL,MPI_SUM,0,MPI_COMM_WORLD,ierr)
  if (proc_me==0)  write(*,*) 'sum temp after',after

  call MPI_FINALIZE(ierr)

end program prova

Jobscript template (FERMI)

#!/bin/bash
# @ job_type=bluegene
# @ bg_size=128
# @ wall_clock_limit = 00:10:00
# @ account_no =
# @ class = training 
# @ queue
module load bgq-gnu zlib
runjob --np 25 --env-all : ./transport-parallel.x

Jobscript template (PLX) 

#!/bin/bash
#PBS -N transpar
#PBS -l select=1:ncpus=8:mpiprocs=8
#PBS -l walltime=0:10:00
#PBS -A 
#PBS -q debug

cd $PBS_O_WORKDIR
module load autoload openmpi/1.4.5-mt--intel--11.1--binary

mpirun -n 8 ./transport-parallel.x