FFT solution 4

C

#include "p3dfft.h"
#include <mpi.h>
#include <stdio.h>
#include <stdlib.h>
#include <math.h>

int main(int argc,char **argv)
{
   double *A,*B,*p,*C;
   int i, j, k, x, y, z, nx, ny, nz;
   int proc_id, nproc, dims[2];
   int memsize[3];
   int istart[3], isize[3], iend[3];
   int fstart[3], fsize[3], fend[3];
   int conf, iproc, jproc, ng[3];
   long int Nglob, Ntot;
   double pi, twopi, sinyz;
   double cdiff, ccdiff, ans, prec;
   double *sinx, *siny, *sinz, factor;
   unsigned char op_f[3]="fft", op_b[3]="tff";

   /* MPI Initializzation */
   MPI_Init(&argc,&argv);
   MPI_Comm_size(MPI_COMM_WORLD,&nproc);
   MPI_Comm_rank(MPI_COMM_WORLD,&proc_id);

   /* Dimension definition */ 
   nx=ny=nz=128; 

   /* Dims create */
   dims[0]=dims[1]=0;
   MPI_Dims_create(nproc,2,dims);
   if(dims[0] > dims[1]) {
     dims[0] = dims[1];
     dims[1] = nproc/dims[0];
    }
   /* Other initializzations */
   pi = atan(1.0)*4.0;
   twopi = 2.0*pi;
   /* Initialize P3DFFT */
   p3dfft_setup(dims,nx,ny,nz,1,memsize);

   MPI_Barrier(MPI_COMM_WORLD);

   if(proc_id == 0) printf("Calling get_dims 1\n");
   conf = 1;
   /* Get dimensions for input array - real numbers, X-pencil shape.
      Note that we are following the Fortran ordering, i.e. 
      the dimension  with stride-1 is X. */
   p3dfft_get_dims(istart,iend,isize,conf);

   if(proc_id == 0) printf("Calling get_dims 2\n");
   conf = 2;
   /* Get dimensions for output array - complex numbers, Z-pencil shape.
      Stride-1 dimension could be X or Z, depending on how the library 
      was compiled (stride1 option) */
   p3dfft_get_dims(fstart,fend,fsize,conf);

   if(proc_id == 0) printf("nx = %d;  ny = %d;   nz = %d\n", nx, ny, nz);

   /* Allocatring */
   if(proc_id == 0) printf("Allocating A\n");
   A = (double *) malloc(sizeof(double) * isize[0]*isize[1]*isize[2]);
   if(proc_id == 0) printf("Allocating B\n");
   B = (double *) malloc(sizeof(double) * fsize[0]*fsize[1]*fsize[2]*2);
   if(proc_id == 0) printf("Allocating C\n");
   C = (double *) malloc(sizeof(double) * isize[0]*isize[1]*isize[2]);

   if(A == NULL) 
     printf("%d: Error allocating array A (%d)\n",proc_id,isize[0]*isize[1]*isize[2]);
   if(B == NULL) 
     printf("%d: Error allocating array B (%d)\n",proc_id,fsize[0]*fsize[1]*fsize[2]*2);
   if(C == NULL) 
     printf("%d: Error allocating array C (%d)\n",proc_id,isize[0]*isize[1]*isize[2]);

   sinx = malloc(sizeof(double)*nx);
   siny = malloc(sizeof(double)*ny);
   sinz = malloc(sizeof(double)*nz);
   /* Initial Solution */
   if(proc_id == 0) printf("Initial solution Step 1 of 2\n");
   for(z=0;z < isize[2];z++){
     sinz[z] = sin((z+istart[2]-1)*twopi/nz);
   }
   for(y=0;y < isize[1];y++){
     siny[y] = sin((y+istart[1]-1)*twopi/ny);
   }
   for(x=0;x < isize[0];x++){
     sinx[x] = sin((x+istart[0]-1)*twopi/nx);
   }
   if(proc_id == 0) printf("Initial solution Step 2 of 2\n");
   p = A;
   for(z=0;z < isize[2];z++){
     for(y=0;y < isize[1];y++) {
       sinyz = siny[y]*sinz[z];
       for(x=0;x < isize[0];x++)
          *p++ = sinx[x]*sinyz;
     }
   }
   if(proc_id == 0) printf("Create Normalizzation Factor\n");
   Ntot = fsize[0]*fsize[1];
   Ntot *= fsize[2]*2;
   Nglob = nx * ny;
   Nglob *= nz;
   factor = 1.0/Nglob;

   MPI_Barrier(MPI_COMM_WORLD);
   if(proc_id == 0) printf("Real to complex DFT upward\n");
   p3dfft_ftran_r2c(A,B,op_f);

   MPI_Barrier(MPI_COMM_WORLD);
   if(proc_id == 0) printf("Normalizzation\n");
   /* Normalizzation */
   for(i=0;i < Ntot;i++){
     B[i] *= factor;
   }

   MPI_Barrier(MPI_COMM_WORLD);
   if(proc_id == 0) printf("Real to complex DFT backward\n");
   p3dfft_btran_c2r(B,C,op_b);

   MPI_Barrier(MPI_COMM_WORLD);
   if(proc_id == 0) printf("P3DFFT cleanup\n");
   p3dfft_clean();
  
   if(proc_id == 0) printf("Check Results\n");
   /* Check results */
   cdiff = 0.0; p = C;
   for(z=0;z < isize[2];z++){
     for(y=0;y < isize[1];y++)  {
        sinyz =siny[y]*sinz[z];
        for(x=0;x < isize[0];x++) {
           ans = sinx[x]*sinyz;
           if(cdiff < fabs(*p - ans))
            cdiff = fabs(*p - ans);
	    p++;
         }
     }
   }
   MPI_Reduce(&cdiff,&ccdiff,1,MPI_DOUBLE,MPI_MAX,0,MPI_COMM_WORLD);

  if(proc_id == 0) {
    prec = 1.0e-7;

    if(ccdiff > prec * Nglob*0.25)
      printf("Results are incorrect\n");
    else
      printf("Results are correct\n");

    printf("max diff =%g\n",ccdiff);
  }

   if(proc_id == 0) printf("MPI Finalize\n");
  MPI_Finalize();

}

   FORTRAN

      PROGRAM FFT_3D_2Decomp_MPI
      use mpi
      use, intrinsic :: iso_c_binding
      use decomp_2d
      use decomp_2d_fft
      implicit none
      integer, parameter :: L = 128
      integer, parameter :: M = 128
      integer, parameter :: N = 128
      real, parameter :: prec = 1.0e-7
      integer :: p_row, p_col
      integer :: nx, ny, nz
      integer :: nglob, nlocin, nlocout
      integer, dimension(2) :: dims
      integer, dimension(3) :: fft_start, fft_end, fft_size
      real :: diff, cdiff, ccdiff, pi, twopi
      real(mytype), allocatable, dimension(:) :: sinx, siny, sinz
      real(mytype), allocatable, dimension(:,:,:) :: in, out2
      complex(mytype), allocatable, dimension(:,:,:) :: out
      real(mytype) :: dummy
      integer :: ierror, i,j,k, numproc, mype
      integer, dimension(3) :: sizex, sizez

! ===== Initialize
       call MPI_INIT(ierror)
       call MPI_Comm_size(MPI_COMM_WORLD, numproc, ierror) 
       call MPI_COMM_RANK(MPI_COMM_WORLD, mype, ierror)

       dims = 0
       call MPI_Dims_create(numproc, 2, dims, ierror)
       if (dims(1) .gt. dims(2)) then
         dims(1) = dims(2)
         dims(2) = numproc/dims(1)
       endif
       p_row = dims(1)
       p_col = dims(2)

       if (mype .eq. 0) write(*,*) 'Initializing 2Decomp&FFT...' 

       call decomp_2d_init(L,M,N,p_row,p_col)
       call decomp_2d_fft_init
       call MPI_Barrier(MPI_COMM_WORLD, ierror)

       if (mype .eq. 0) write(*,*) 'Initialize variables...' 

       pi = atan(1.0)*4.0;
       twopi = 2.0*pi;

       do i =1, 3
          sizex(i) = xend(i) - xstart(i) + 1
          sizez(i) = zend(i) - zstart(i) + 1
       end do

       nglob = L * M * N
       nlocin = sizex(1) * sizex(2) * sizex(3)
       nlocout = sizez(1) * sizez(2) * sizez(3)

       if (mype .eq. 0) write(*,*) 'Allocating array...' 

       call decomp_2d_fft_get_size(fft_start,fft_end,fft_size)
       allocate (in(xstart(1):xend(1),xstart(2):xend(2),xstart(3):xend(3)))
       allocate (out(fft_start(1):fft_end(1), fft_start(2):fft_end(2), fft_start(3):fft_end(3)))
       allocate (out2(xstart(1):xend(1),xstart(2):xend(2),xstart(3):xend(3)))

       allocate (sinx(L))
       allocate (siny(M))
       allocate (sinz(N))

! ===== each processor gets its local portion of global data =====
       if (mype .eq. 0) write(*,*) 'Initializing array...' 
       do i =1, L
         sinx(i) = sin(twopi*real(i)/L)
       end do
       do j =1, M
         siny(j) = sin(twopi*real(j)/M)
       end do
       do k =1, N
         sinz(k) = sin(twopi*real(k)/N)
       end do
       do k=xstart(3),xend(3)
          do j=xstart(2),xend(2)
             dummy = sinz(k) * siny(j)
             do i=xstart(1),xend(1)
                in(i,j,k) = dummy * sinx(i)
             end do
          end do
       end do

! ===== 3D forward FFT =====
       if (mype .eq. 0) write(*,*) 'Forward FFT...' 
       call decomp_2d_fft_3d(in, out)
       call MPI_Barrier(MPI_COMM_WORLD, ierror)

! ===== Normalizzation =====
       if (mype .eq. 0) write(*,*) 'Normalizzation...' 
       do k=fft_start(3),fft_end(3)
          do j=fft_start(2),fft_end(2)
             do i=fft_start(1),fft_end(1)
                out(i,j,k) = out(i,j,k)/nglob
             end do
          end do
       end do
       call MPI_Barrier(MPI_COMM_WORLD, ierror)

! ===== 3D backward =====
       if (mype .eq. 0) write(*,*) 'Backward FFT...' 
       call decomp_2d_fft_3d(out, out2)

! ===== Compare =====
       call MPI_Barrier(MPI_COMM_WORLD, ierror)
       if (mype .eq. 0) write(*,*) 'Comparing...' 
       cdiff =0.
       do k=xstart(3),xend(3)
          do j=xstart(2),xend(2)
             do i=xstart(1),xend(1)
               diff = abs(in(i,j,k) - out2(i,j,k))
               if(cdiff .lt. diff) then
                 cdiff = diff
               endif
             end do
          end do
        end do
        call MPI_Reduce(cdiff, ccdiff, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD, ierror)
        if (mype .eq. 0) then
          write(*,*) 'Cdiff_max =', ccdiff
          if(cdiff .gt. (prec*nglob*0.25)) then
            write(*,*) 'Results are incorrect:'
          else
            write(*,*) 'Results are correct!'
          endif
        endif

! =========== Finalize ===============
       if (mype .eq. 0) write(*,*) 'Finalizzing...' 
        call decomp_2d_fft_finalize
        call decomp_2d_finalize
        deallocate(in, out, out2)
        call MPI_FINALIZE(ierror)

      end

For compilation (on FERMI):

module purge
module load autoload fftw/3.3.2--bgq-xl--1.0 bgq-xl/1.0

# fortran
mpixlf2003_r -O3 -qstrict -qarch=qp -qtune=qp -I$FFTW3_INCLUDE Es_3.f90 -L$FFTW3_LIB -lfftw3_mpi -lfftw3f -lfftw3 -lm -o Es_3

# C
mpixlc_r -O3 -qstrict -qarch=qp -qtune=qp -I$FFTW3_INCLUDE Es_3.c -L$FFTW3_LIB -lfftw3_mpi -lfftw3f -lfftw3 -lm -o Es_3

For compilation (on EURORA):

module purge
module load profile/advanced
module load autoload 2Decomp_fft/1.5.847--openmpi--1.6.4--intel--cs-xe-2013--binary

# fortran
mpif90 -O3 -fpp -I. -I$DECOMP_2D_FFT_INCLUDE -I$FFTW_INC es4.f90 -L$DECOMP_2D_FFT_LIB -L$FFTW_LIB -l2decomp_fft -lfftw3_mpi -lfftw3 -o Es_4

# C
The P3DFFT library is not currently available on EURORA cluster due to some bugs on the library itself. When a new and more stable version of the library will be released we will install it also on EURORA and GALILEO.

Menu utility

Main menu

You are here

FFT solution 4

Menu utility

Search form

Main menu

You are here

FFT solution 4