diff --git a/.gitignore b/.gitignore index 5af028c..879dde1 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,6 @@ *.o *.mod xcompact3d -*.dat \ No newline at end of file +*.dat +2decomp +TAGS \ No newline at end of file diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..f65303d --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "decomp2d"] + path = decomp2d + url = git@github.com:xcompact3d/2decomp_fft.git diff --git a/2decomp b/2decomp deleted file mode 100755 index 3a867b2..0000000 Binary files a/2decomp and /dev/null differ diff --git a/Makefile b/Makefile index 06a67c9..8f97da6 100644 --- a/Makefile +++ b/Makefile @@ -7,8 +7,9 @@ # -DDEBG - debuggin xcompact3d.f90 # generate a Git version string GIT_VERSION := $(shell git describe --tag --long --always) +BUILD = -DEFS = -DDOUBLE_PREC -DVERSION=\"$(GIT_VERSION)\" +DEFS ?= -DDOUBLE_PREC LCL = local# local,lad,sdu,archer IVER = 17# 15,16,17,18 @@ -17,74 +18,59 @@ FFT = generic# generic,fftw3,mkl #######CMP settings########### ifeq ($(CMP),intel) -FC = mpiifort -#FFLAGS = -fpp -O3 -xHost -heap-arrays -shared-intel -mcmodel=large -safe-cray-ptr -g -traceback -FFLAGS = -fpp -O3 -xSSE4.2 -axAVX,CORE-AVX-I,CORE-AVX2 -ipo -fp-model fast=2 -mcmodel=large -safe-cray-ptr -I$(MPI_ROOT)/lib -##debuggin test: -check all -check bounds -chintel eck uninit -gen-interfaces -warn interfaces + FC = mpiifort + #FFLAGS = -fpp -O3 -xHost -heap-arrays -shared-intel -mcmodel=large -safe-cray-ptr -g -traceback + FFLAGS = -fpp -O3 -xSSE4.2 -axAVX,CORE-AVX-I,CORE-AVX2 -ipo -fp-model fast=2 -mcmodel=large -safe-cray-ptr -I$(MPI_ROOT)/lib + ##debuggin test: -check all -check bounds -chintel eck uninit -gen-interfaces -warn interfaces else ifeq ($(CMP),gcc) -FC = mpif90 -#FFLAGS = -O3 -funroll-loops -floop-optimize -g -Warray-bounds -fcray-pointer -x f95-cpp-input -FFLAGS = -cpp -O3 -funroll-loops -floop-optimize -g -Warray-bounds -fcray-pointer -fbacktrace -ffree-line-length-none -#-ffpe-trap=invalid,zero + FC = mpif90 + FFLAGS = -cpp + ifeq ($(BUILD), debug) + FFLAGS += -g3 -Og + FFLAGS += -ffpe-trap=invalid,zero + else + FFLAGS += -g -O3 + FFLAGS += -funroll-loops -floop-optimize + endif + FFLAGS += -Warray-bounds -fcray-pointer -fbacktrace -ffree-line-length-none else ifeq ($(CMP),nagfor) -FC = mpinagfor -FFLAGS = -fpp + FC = mpinagfor + FFLAGS = -fpp else ifeq ($(CMP),cray) -FC = ftn -FFLAGS = -eF -g -O3 -N 1023 + FC = ftn + FFLAGS = -eF -g -O3 -N 1023 endif - MODDIR = ./mod DECOMPDIR = ./decomp2d SRCDIR = ./src ### List of files for the main code -SRCDECOMP = $(DECOMPDIR)/decomp_2d.f90 $(DECOMPDIR)/glassman.f90 $(DECOMPDIR)/fft_$(FFT).f90 $(DECOMPDIR)/io.f90 -OBJDECOMP = $(SRCDECOMP:%.f90=%.o) -SRC = $(SRCDIR)/module_param.f90 $(SRCDIR)/variables.f90 $(SRCDIR)/poisson.f90 $(SRCDIR)/derive.f90 $(SRCDIR)/schemes.f90 $(SRCDIR)/parameters.f90 #$(SRCDIR)/*.f90 +SRC = $(SRCDIR)/module_param.f90 $(SRCDIR)/variables.f90 $(SRCDIR)/poisson.f90 $(SRCDIR)/derive.f90 $(SRCDIR)/schemes.f90 $(SRCDIR)/parameters.f90 OBJ = $(SRC:%.f90=%.o) SRC = $(SRCDIR)/module_param.f90 $(SRCDIR)/variables.f90 $(SRCDIR)/poisson.f90 $(SRCDIR)/derive.f90 $(SRCDIR)/schemes.f90 $(SRCDIR)/navier.f90 $(SRCDIR)/parameters.f90 $(SRCDIR)/mom.f90 $(SRCDIR)/case.f90 $(SRCDIR)/transeq.f90 $(SRCDIR)/xcompact3d.f90 - -#######FFT settings########## -ifeq ($(FFT),fftw3) - #FFTW3_PATH=/usr - #FFTW3_PATH=/usr/lib64 - FFTW3_PATH=/usr/local/Cellar/fftw/3.3.7_1 - INC=-I$(FFTW3_PATH)/include - LIBFFT=-L$(FFTW3_PATH) -lfftw3 -lfftw3f -else ifeq ($(FFT),fftw3_f03) - FFTW3_PATH=/usr #ubuntu # apt install libfftw3-dev - #FFTW3_PATH=/usr/lib64 #fedora # dnf install fftw fftw-devel - #FFTW3_PATH=/usr/local/Cellar/fftw/3.3.7_1 #macOS # brew install fftw - INC=-I$(FFTW3_PATH)/include - LIBFFT=-L$(FFTW3_PATH)/lib -lfftw3 -lfftw3f -else ifeq ($(FFT),generic) - INC= - LIBFFT= -else ifeq ($(FFT),mkl) - SRCDECOMP := $(DECOMPDIR)/mkl_dfti.f90 $(SRCDECOMP) - LIBFFT=-Wl,--start-group $(MKLROOT)/lib/intel64/libmkl_intel_lp64.a $(MKLROOT)/lib/intel64/libmkl_sequential.a $(MKLROOT)/lib/intel64/libmkl_core.a -Wl,--end-group -lpthread - INC=-I$(MKLROOT)/include -endif - #######OPTIONS settings########### -OPT = -I$(SRCDIR) -I$(DECOMPDIR) $(FFLAGS) +OPT = -I$(SRCDIR) -I$(DECOMP_INCDIR) $(FFLAGS) LINKOPT = $(FFLAGS) #----------------------------------------------------------------------- # Normally no need to change anything below -all: xcompact3d +DECOMP_LIB = 2decomp_fft +DECOMP_LIBDIR = $(DECOMPDIR)/lib +DECOMP_INCDIR = $(DECOMPDIR)/include +DECOMP.A = $(DECOMP_LIBDIR)/lib$(DECOMP_LIB).a +include $(DECOMPDIR)/src/Makefile.inc -xcompact3d : $(OBJDECOMP) $(OBJ) - $(FC) -o $@ $(LINKOPT) $(OBJDECOMP) $(OBJ) $(LIBFFT) +DEFS += -DVERSION=\"$(GIT_VERSION)\" -$(OBJDECOMP):$(DECOMPDIR)%.o : $(DECOMPDIR)%.f90 - $(FC) $(FFLAGS) $(OPT) $(DEFS) $(DEFS2) $(INC) -c $< - mv $(@F) ${DECOMPDIR} - #mv *.mod ${DECOMPDIR} +all: xcompact3d +xcompact3d : $(DECOMP.A) $(OBJ) + $(FC) -o $@ $(LINKOPT) $(OBJ) -L$(DECOMP_LIBDIR) -l$(DECOMP_LIB) $(LIBFFT) + +$(DECOMP.A): + make -C $(DECOMPDIR) F90=$(FC) OPTIONS="$(FFLAGS) $(DEFS) $(DEFS2)" lib $(OBJ):$(SRCDIR)%.o : $(SRCDIR)%.f90 $(FC) $(FFLAGS) $(OPT) $(DEFS) $(DEFS2) $(INC) -c $< @@ -104,10 +90,13 @@ post: clean: - rm -f $(DECOMPDIR)/*.o $(DECOMPDIR)/*.mod rm -f $(SRCDIR)/*.o $(SRCDIR)/*.mod rm -f *.o *.mod xcompact3d post +.PHONY: clean-decomp +clean-decomp: + make -C $(DECOMPDIR) clean + .PHONY: cleanall -cleanall: clean +cleanall: clean clean-decomp rm -f *~ \#*\# out/* data/* stats/* planes/* *.xdmf *.log *.out nodefile core sauve* diff --git a/README.md b/README.md index 827464b..663ca1f 100644 --- a/README.md +++ b/README.md @@ -34,3 +34,32 @@ For benchmarking `$test` mode should be disabled, its intention is to validate c implementations of the compact finite difference scheme solvers. Note this code is a very stripped down version of Xcompact3d, it is intended for profiling only. + +## 2decomp&fft + +Like Xcompact3d, x3div builds upon the 2decomp&fft library - rather than copy the code, we now +have a git submodule tracking our fork [2decomp&fft](https://github.com/xcompact3d/2decomp_fft) +which tracks the upstream repo [2decomp&fft-upstream](https://github.com/numericalalgorithmsgroup/2decomp_fft). +This means we can share code more easily (and benefit from others' contributions). + +For detailed instructions on using git submodules see [git docs](https://git-scm.com/book/en/v2/Git-Tools-Submodules) +however if you are not planning to work on 2decomp&fft itself the following should suffice: + +1) Initial clone of x3div ``git clone https://github.com/xcompact3d/x3div`` +2) Initialise and update the 2decomp&fft submodule ``cd x3div && git submodule init && git submodule update`` + +After which you can continue to build as normal (running ``make`` will first call ``make`` on the 2decomp&fft submodule +and then link the resulting library into x3div). +To ensure you receive the latest changes to 2decomp&fft run ``git submodule update --remote`` periodically, note that you +can also work within the ``decomp2d/`` directory as though it were a standalone git project. + +Note that variables are passed down by ``make``, therefore if you have installed ``ffte`` at +``${FFTE_DIR}`` then you can build against this by running + +`` +make FFT=ffte FFTE_PATH=${FFTE_DIR} +`` + +where ``FFTE_PATH`` is used by 2decomp&fft to link the appropriate library - see +``decomp2d/src/Makefile.inc`` for different FFT library options. +By default ``FFT=generic`` and no external libraries are required. diff --git a/decomp2d b/decomp2d new file mode 160000 index 0000000..aee93b7 --- /dev/null +++ b/decomp2d @@ -0,0 +1 @@ +Subproject commit aee93b7b1dedb35e4fcaf1b985c008426298de1c diff --git a/decomp2d/alloc.inc b/decomp2d/alloc.inc deleted file mode 100644 index c015617..0000000 --- a/decomp2d/alloc.inc +++ /dev/null @@ -1,277 +0,0 @@ -!======================================================================= -! This is part of the 2DECOMP&FFT library -! -! 2DECOMP&FFT is a software framework for general-purpose 2D (pencil) -! decomposition. It also implements a highly scalable distributed -! three-dimensional Fast Fourier Transform (FFT). -! -! Copyright (C) 2009-2012 Ning Li, the Numerical Algorithms Group (NAG) -! -!======================================================================= - -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! -! Utility routine to help allocate 3D arrays -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - -! X-pencil real arrays -subroutine alloc_x_real(var, opt_decomp, opt_global) - -implicit none - -real(mytype), allocatable, dimension(:,:,:) :: var -TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp -logical, intent(IN), optional :: opt_global - -TYPE(DECOMP_INFO) :: decomp -logical :: global -integer :: alloc_stat, errorcode - -if (present(opt_decomp)) then -decomp = opt_decomp -else -decomp = decomp_main -end if - -if (present(opt_global)) then -global = opt_global -else -global = .false. -end if - -if (global) then -allocate(var(decomp%xst(1):decomp%xen(1), & -decomp%xst(2):decomp%xen(2), decomp%xst(3):decomp%xen(3)), & -stat=alloc_stat) -else -allocate(var(decomp%xsz(1),decomp%xsz(2),decomp%xsz(3)), & -stat=alloc_stat) -end if - -if (alloc_stat /= 0) then -errorcode = 8 -call decomp_2d_abort(errorcode, & -'Memory allocation failed when creating new arrays') -end if - -return -end subroutine alloc_x_real - - -! X-pencil complex arrays -subroutine alloc_x_complex(var, opt_decomp, opt_global) - -implicit none - -complex(mytype), allocatable, dimension(:,:,:) :: var -TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp -logical, intent(IN), optional :: opt_global - -TYPE(DECOMP_INFO) :: decomp -logical :: global -integer :: alloc_stat, errorcode - -if (present(opt_decomp)) then -decomp = opt_decomp -else -decomp = decomp_main -end if - -if (present(opt_global)) then -global = opt_global -else -global = .false. -end if - -if (global) then -allocate(var(decomp%xst(1):decomp%xen(1), & -decomp%xst(2):decomp%xen(2), decomp%xst(3):decomp%xen(3)), & -stat=alloc_stat) -else -allocate(var(decomp%xsz(1),decomp%xsz(2),decomp%xsz(3)), & -stat=alloc_stat) -end if - -if (alloc_stat /= 0) then -errorcode = 8 -call decomp_2d_abort(errorcode, & -'Memory allocation failed when creating new arrays') -end if - -return -end subroutine alloc_x_complex - - -! Y-pencil real arrays -subroutine alloc_y_real(var, opt_decomp, opt_global) - -implicit none - -real(mytype), allocatable, dimension(:,:,:) :: var -TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp -logical, intent(IN), optional :: opt_global - -TYPE(DECOMP_INFO) :: decomp -logical :: global -integer :: alloc_stat, errorcode - -if (present(opt_decomp)) then -decomp = opt_decomp -else -decomp = decomp_main -end if - -if (present(opt_global)) then -global = opt_global -else -global = .false. -end if - -if (global) then -allocate(var(decomp%yst(1):decomp%yen(1), & -decomp%yst(2):decomp%yen(2), decomp%yst(3):decomp%yen(3)), & -stat=alloc_stat) -else -allocate(var(decomp%ysz(1),decomp%ysz(2),decomp%ysz(3)), & -stat=alloc_stat) -end if - -if (alloc_stat /= 0) then -errorcode = 8 -call decomp_2d_abort(errorcode, & -'Memory allocation failed when creating new arrays') -end if - -return -end subroutine alloc_y_real - - -! Y-pencil complex arrays -subroutine alloc_y_complex(var, opt_decomp, opt_global) - -implicit none - -complex(mytype), allocatable, dimension(:,:,:) :: var -TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp -logical, intent(IN), optional :: opt_global - -TYPE(DECOMP_INFO) :: decomp -logical :: global -integer :: alloc_stat, errorcode - -if (present(opt_decomp)) then -decomp = opt_decomp -else -decomp = decomp_main -end if - -if (present(opt_global)) then -global = opt_global -else -global = .false. -end if - -if (global) then -allocate(var(decomp%yst(1):decomp%yen(1), & -decomp%yst(2):decomp%yen(2), decomp%yst(3):decomp%yen(3)), & -stat=alloc_stat) -else -allocate(var(decomp%ysz(1),decomp%ysz(2),decomp%ysz(3)), & -stat=alloc_stat) -end if - -if (alloc_stat /= 0) then -errorcode = 8 -call decomp_2d_abort(errorcode, & -'Memory allocation failed when creating new arrays') -end if - -return -end subroutine alloc_y_complex - - -! Z-pencil real arrays -subroutine alloc_z_real(var, opt_decomp, opt_global) - -implicit none - -real(mytype), allocatable, dimension(:,:,:) :: var -TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp -logical, intent(IN), optional :: opt_global - -TYPE(DECOMP_INFO) :: decomp -logical :: global -integer :: alloc_stat, errorcode - -if (present(opt_decomp)) then -decomp = opt_decomp -else -decomp = decomp_main -end if - -if (present(opt_global)) then -global = opt_global -else -global = .false. -end if - -if (global) then -allocate(var(decomp%zst(1):decomp%zen(1), & -decomp%zst(2):decomp%zen(2), decomp%zst(3):decomp%zen(3)), & -stat=alloc_stat) -else -allocate(var(decomp%zsz(1),decomp%zsz(2),decomp%zsz(3)), & -stat=alloc_stat) -end if - -if (alloc_stat /= 0) then -errorcode = 8 -call decomp_2d_abort(errorcode, & -'Memory allocation failed when creating new arrays') -end if - -return -end subroutine alloc_z_real - - -! Z-pencil complex arrays -subroutine alloc_z_complex(var, opt_decomp, opt_global) - -implicit none - -complex(mytype), allocatable, dimension(:,:,:) :: var -TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp -logical, intent(IN), optional :: opt_global - -TYPE(DECOMP_INFO) :: decomp -logical :: global -integer :: alloc_stat, errorcode - -if (present(opt_decomp)) then -decomp = opt_decomp -else -decomp = decomp_main -end if - -if (present(opt_global)) then -global = opt_global -else -global = .false. -end if - -if (global) then -allocate(var(decomp%zst(1):decomp%zen(1), & -decomp%zst(2):decomp%zen(2), decomp%zst(3):decomp%zen(3)), & -stat=alloc_stat) -else -allocate(var(decomp%zsz(1),decomp%zsz(2),decomp%zsz(3)), & -stat=alloc_stat) -end if - -if (alloc_stat /= 0) then -errorcode = 8 -call decomp_2d_abort(errorcode, & -'Memory allocation failed when creating new arrays') -end if - -return -end subroutine alloc_z_complex diff --git a/decomp2d/decomp_2d.f90 b/decomp2d/decomp_2d.f90 deleted file mode 100644 index 97cb562..0000000 --- a/decomp2d/decomp_2d.f90 +++ /dev/null @@ -1,1719 +0,0 @@ -!======================================================================= -! This is part of the 2DECOMP&FFT library -! -! 2DECOMP&FFT is a software framework for general-purpose 2D (pencil) -! decomposition. It also implements a highly scalable distributed -! three-dimensional Fast Fourier Transform (FFT). -! -! Copyright (C) 2009-2012 Ning Li, the Numerical Algorithms Group (NAG) -! -!======================================================================= - -! This is the main 2D pencil decomposition module - -module decomp_2d - - use MPI - - implicit none - - private ! Make everything private unless declared public - -#ifdef DOUBLE_PREC - integer, parameter, public :: mytype = KIND(0.0D0) - integer, parameter, public :: real_type = MPI_DOUBLE_PRECISION - integer, parameter, public :: real2_type = MPI_2DOUBLE_PRECISION - integer, parameter, public :: complex_type = MPI_DOUBLE_COMPLEX -#ifdef SAVE_SINGLE - integer, parameter, public :: mytype_single = KIND(0.0) - integer, parameter, public :: real_type_single = MPI_REAL -#else - integer, parameter, public :: mytype_single = KIND(0.0D0) - integer, parameter, public :: real_type_single = MPI_DOUBLE_PRECISION -#endif -#else - integer, parameter, public :: mytype = KIND(0.0) - integer, parameter, public :: real_type = MPI_REAL - integer, parameter, public :: real2_type = MPI_2REAL - integer, parameter, public :: complex_type = MPI_COMPLEX - integer, parameter, public :: mytype_single = KIND(0.0) - integer, parameter, public :: real_type_single = MPI_REAL -#endif - - integer, save, public :: mytype_bytes - - ! some key global variables - integer, save, public :: nx_global, ny_global, nz_global ! global size - - integer, save, public :: nrank ! local MPI rank - integer, save, public :: nproc ! total number of processors - - ! parameters for 2D Cartesian topology - integer, save, dimension(2) :: dims, coord - logical, save, dimension(2) :: periodic - integer, save, public :: DECOMP_2D_COMM_CART_X, & - DECOMP_2D_COMM_CART_Y, DECOMP_2D_COMM_CART_Z - integer, save :: DECOMP_2D_COMM_ROW, DECOMP_2D_COMM_COL - - ! define neighboring blocks (to be used in halo-cell support) - ! first dimension 1=X-pencil, 2=Y-pencil, 3=Z-pencil - ! second dimension 1=east, 2=west, 3=north, 4=south, 5=top, 6=bottom - integer, save, dimension(3,6) :: neighbour - - ! flags for periodic condition in three dimensions - logical, save :: periodic_x, periodic_y, periodic_z - -#ifdef SHM - ! derived type to store shared-memory info - TYPE, public :: SMP_INFO - integer MPI_COMM ! SMP associated with this communicator - integer NODE_ME ! rank in this communicator - integer NCPU ! size of this communicator - integer SMP_COMM ! communicator for SMP-node masters - integer CORE_COMM ! communicator for cores on SMP-node - integer SMP_ME ! SMP-node id starting from 1 ... NSMP - integer NSMP ! number of SMP-nodes in this communicator - integer CORE_ME ! core id starting from 1 ... NCORE - integer NCORE ! number of cores on this SMP-node - integer MAXCORE ! maximum no. cores on any SMP-node - integer N_SND ! size of SMP shared memory buffer - integer N_RCV ! size of SMP shared memory buffer - integer(8) SND_P ! SNDBUF address (cray pointer), for real - integer(8) RCV_P ! RCVBUF address (cray pointer), for real - integer(8) SND_P_c ! for complex - integer(8) RCV_P_c ! for complex - END TYPE SMP_INFO -#endif - - ! derived type to store decomposition info for a given global data size - TYPE, public :: DECOMP_INFO - ! staring/ending index and size of data held by current processor - integer, dimension(3) :: xst, xen, xsz ! x-pencil - integer, dimension(3) :: yst, yen, ysz ! y-pencil - integer, dimension(3) :: zst, zen, zsz ! z-pencil - - ! in addition to local information, processors also need to know - ! some global information for global communications to work - - ! how each dimension is distributed along pencils - integer, allocatable, dimension(:) :: & - x1dist, y1dist, y2dist, z2dist - - ! send/receive buffer counts and displacements for MPI_ALLTOALLV - integer, allocatable, dimension(:) :: & - x1cnts, y1cnts, y2cnts, z2cnts - integer, allocatable, dimension(:) :: & - x1disp, y1disp, y2disp, z2disp - - ! buffer counts for MPI_ALLTOALL: either for evenly distributed data - ! or for padded-alltoall - integer :: x1count, y1count, y2count, z2count - - ! evenly distributed data - logical :: even - -#ifdef SHM - ! For shared-memory implementation - - ! one instance of this derived type for each communicator - ! shared moemory info, such as which MPI rank belongs to which node - TYPE(SMP_INFO) :: ROW_INFO, COL_INFO - - ! shared send/recv buffers for ALLTOALLV - integer, allocatable, dimension(:) :: x1cnts_s, y1cnts_s, & - y2cnts_s, z2cnts_s - integer, allocatable, dimension(:) :: x1disp_s, y1disp_s, & - y2disp_s, z2disp_s - ! A copy of original buffer displacement (will be overwriten) - integer, allocatable, dimension(:) :: x1disp_o, y1disp_o, & - y2disp_o, z2disp_o -#endif - END TYPE DECOMP_INFO - - ! main (default) decomposition information for global size nx*ny*nz - TYPE(DECOMP_INFO), save :: decomp_main - TYPE(DECOMP_INFO), save, public :: phG,ph1,ph2,ph3,ph4 - - ! staring/ending index and size of data held by current processor - ! duplicate 'decomp_main', needed by apps to define data structure - integer, save, dimension(3), public :: xstart, xend, xsize ! x-pencil - integer, save, dimension(3), public :: ystart, yend, ysize ! y-pencil - integer, save, dimension(3), public :: zstart, zend, zsize ! z-pencil - - ! These are the buffers used by MPI_ALLTOALL(V) calls - integer, save :: decomp_buf_size = 0 - real(mytype), allocatable, dimension(:) :: work1_r, work2_r - complex(mytype), allocatable, dimension(:) :: work1_c, work2_c - -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - ! To define smaller arrays using every several mesh points - integer, save, dimension(3), public :: xszS,yszS,zszS,xstS,ystS,zstS,xenS,yenS,zenS - integer, save, dimension(3), public :: xszV,yszV,zszV,xstV,ystV,zstV,xenV,yenV,zenV - integer, save, dimension(3), public :: xszP,yszP,zszP,xstP,ystP,zstP,xenP,yenP,zenP - logical, save :: coarse_mesh_starts_from_1 - integer, save :: iskipS, jskipS, kskipS - integer, save :: iskipV, jskipV, kskipV - integer, save :: iskipP, jskipP, kskipP - - - ! public user routines - public :: decomp_2d_init, decomp_2d_finalize, & - transpose_x_to_y, transpose_y_to_z, & - transpose_z_to_y, transpose_y_to_x, & -#ifdef OCC - transpose_x_to_y_start, transpose_y_to_z_start, & - transpose_z_to_y_start, transpose_y_to_x_start, & - transpose_x_to_y_wait, transpose_y_to_z_wait, & - transpose_z_to_y_wait, transpose_y_to_x_wait, & - transpose_test, & -#endif - decomp_info_init, decomp_info_finalize, partition, & - init_coarser_mesh_statS,fine_to_coarseS,& - init_coarser_mesh_statV,fine_to_coarseV,& - init_coarser_mesh_statP,fine_to_coarseP,& - alloc_x, alloc_y, alloc_z, & - update_halo, decomp_2d_abort, & - get_decomp_info - - -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - ! These are routines to perform global data transpositions - ! - ! Four combinations are available, enough to cover all situations - ! - transpose_x_to_y (X-pencil --> Y-pencil) - ! - transpose_y_to_z (Y-pencil --> Z-pencil) - ! - transpose_z_to_y (Z-pencil --> Y-pencil) - ! - transpose_y_to_x (Y-pencil --> X-pencil) - ! - ! Generic interface provided here to support multiple data types - ! - real and complex types supported through generic interface - ! - single/double precision supported through pre-processing - ! * see 'mytype' variable at the beginning - ! - an optional argument can be supplied to transpose data whose - ! global size is not the default nx*ny*nz - ! * as the case in fft r2c/c2r interface -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - interface transpose_x_to_y - module procedure transpose_x_to_y_real - module procedure transpose_x_to_y_complex - end interface transpose_x_to_y - - interface transpose_y_to_z - module procedure transpose_y_to_z_real - module procedure transpose_y_to_z_complex - end interface transpose_y_to_z - - interface transpose_z_to_y - module procedure transpose_z_to_y_real - module procedure transpose_z_to_y_complex - end interface transpose_z_to_y - - interface transpose_y_to_x - module procedure transpose_y_to_x_real - module procedure transpose_y_to_x_complex - end interface transpose_y_to_x - -#ifdef OCC - interface transpose_x_to_y_start - module procedure transpose_x_to_y_real_start - module procedure transpose_x_to_y_complex_start - end interface transpose_x_to_y_start - - interface transpose_y_to_z_start - module procedure transpose_y_to_z_real_start - module procedure transpose_y_to_z_complex_start - end interface transpose_y_to_z_start - - interface transpose_z_to_y_start - module procedure transpose_z_to_y_real_start - module procedure transpose_z_to_y_complex_start - end interface transpose_z_to_y_start - - interface transpose_y_to_x_start - module procedure transpose_y_to_x_real_start - module procedure transpose_y_to_x_complex_start - end interface transpose_y_to_x_start - - interface transpose_x_to_y_wait - module procedure transpose_x_to_y_real_wait - module procedure transpose_x_to_y_complex_wait - end interface transpose_x_to_y_wait - - interface transpose_y_to_z_wait - module procedure transpose_y_to_z_real_wait - module procedure transpose_y_to_z_complex_wait - end interface transpose_y_to_z_wait - - interface transpose_z_to_y_wait - module procedure transpose_z_to_y_real_wait - module procedure transpose_z_to_y_complex_wait - end interface transpose_z_to_y_wait - - interface transpose_y_to_x_wait - module procedure transpose_y_to_x_real_wait - module procedure transpose_y_to_x_complex_wait - end interface transpose_y_to_x_wait -#endif - - interface update_halo - module procedure update_halo_real - module procedure update_halo_complex - end interface update_halo - - interface alloc_x - module procedure alloc_x_real - module procedure alloc_x_complex - end interface alloc_x - - interface alloc_y - module procedure alloc_y_real - module procedure alloc_y_complex - end interface alloc_y - - interface alloc_z - module procedure alloc_z_real - module procedure alloc_z_complex - end interface alloc_z - -contains - -#ifdef SHM_DEBUG -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - ! For debugging, print the shared-memory structure - subroutine print_smp_info(s) - TYPE(SMP_INFO) :: s - write(10,*) 'size of current communicator:', s%NCPU - write(10,*) 'rank in current communicator:', s%NODE_ME - write(10,*) 'number of SMP-nodes in this communicator:', s%NSMP - write(10,*) 'SMP-node id (1 ~ NSMP):', s%SMP_ME - write(10,*) 'NCORE - number of cores on this SMP-node', s%NCORE - write(10,*) 'core id (1 ~ NCORE):', s%CORE_ME - write(10,*) 'maximum no. cores on any SMP-node:', s%MAXCORE - write(10,*) 'size of SMP shared memory SND buffer:', s%N_SND - write(10,*) 'size of SMP shared memory RCV buffer:', s%N_RCV - end subroutine print_smp_info -#endif - -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - ! Routine to be called by applications to initialise this library - ! INPUT: - ! nx, ny, nz - global data dimension - ! p_row, p_col - 2D processor grid - ! OUTPUT: - ! all internal data structures initialised properly - ! library ready to use -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - subroutine decomp_2d_init(nx,ny,nz,p_row,p_col,periodic_bc) - - implicit none - - integer, intent(IN) :: nx,ny,nz,p_row,p_col - logical, dimension(3), intent(IN), optional :: periodic_bc - - integer :: errorcode, ierror, row, col - -#ifdef SHM_DEBUG - character(len=80) fname -#endif - - nx_global = nx - ny_global = ny - nz_global = nz - - if (present(periodic_bc)) then - periodic_x = periodic_bc(1) - periodic_y = periodic_bc(2) - periodic_z = periodic_bc(3) - else - periodic_x = .false. - periodic_y = .false. - periodic_z = .false. - end if - - if (p_row==0 .and. p_col==0) then - ! determine the best 2D processor grid - call best_2d_grid(nproc, row, col) - else - if (nproc /= p_row*p_col) then - errorcode = 1 - call decomp_2d_abort(errorcode, & - 'Invalid 2D processor grid - nproc /= p_row*p_col') - else - row = p_row - col = p_col - end if - end if - - ! Create 2D Catersian topology - ! Note that in order to support periodic B.C. in the halo-cell code, - ! need to create multiple topology objects: DECOMP_2D_COMM_CART_?, - ! corresponding to three pencil orientations. They contain almost - ! identical topological information but allow different combinations - ! of periodic conditions. - dims(1) = row - dims(2) = col - periodic(1) = periodic_y - periodic(2) = periodic_z - call MPI_CART_CREATE(MPI_COMM_WORLD,2,dims,periodic, & - .false., & ! do not reorder rank - DECOMP_2D_COMM_CART_X, ierror) - periodic(1) = periodic_x - periodic(2) = periodic_z - call MPI_CART_CREATE(MPI_COMM_WORLD,2,dims,periodic, & - .false., DECOMP_2D_COMM_CART_Y, ierror) - periodic(1) = periodic_x - periodic(2) = periodic_y - call MPI_CART_CREATE(MPI_COMM_WORLD,2,dims,periodic, & - .false., DECOMP_2D_COMM_CART_Z, ierror) - - call MPI_CART_COORDS(DECOMP_2D_COMM_CART_X,nrank,2,coord,ierror) - - ! derive communicators defining sub-groups for ALLTOALL(V) - call MPI_CART_SUB(DECOMP_2D_COMM_CART_X,(/.true.,.false./), & - DECOMP_2D_COMM_COL,ierror) - call MPI_CART_SUB(DECOMP_2D_COMM_CART_X,(/.false.,.true./), & - DECOMP_2D_COMM_ROW,ierror) - - ! gather information for halo-cell support code - call init_neighbour - - ! actually generate all 2D decomposition information - call decomp_info_init(nx,ny,nz,decomp_main) - - ! make a copy of the decomposition information associated with the - ! default global size in these global variables so applications can - ! use them to create data structures - xstart = decomp_main%xst - ystart = decomp_main%yst - zstart = decomp_main%zst - xend = decomp_main%xen - yend = decomp_main%yen - zend = decomp_main%zen - xsize = decomp_main%xsz - ysize = decomp_main%ysz - zsize = decomp_main%zsz - -#ifdef SHM_DEBUG - ! print out shared-memory information - write(fname,99) nrank -99 format('log',I2.2) - open(10,file=fname) - write(10,*)'I am mpi rank ', nrank, 'Total ranks ', nproc - write(10,*)' ' - write(10,*)'Global data size:' - write(10,*)'nx*ny*nz', nx,ny,nz - write(10,*)' ' - write(10,*)'2D processor grid:' - write(10,*)'p_row*p_col:', dims(1), dims(2) - write(10,*)' ' - write(10,*)'Portion of global data held locally:' - write(10,*)'xsize:',xsize - write(10,*)'ysize:',ysize - write(10,*)'zsize:',zsize - write(10,*)' ' - write(10,*)'How pensils are to be divided and sent in alltoallv:' - write(10,*)'x1dist:',decomp_main%x1dist - write(10,*)'y1dist:',decomp_main%y1dist - write(10,*)'y2dist:',decomp_main%y2dist - write(10,*)'z2dist:',decomp_main%z2dist - write(10,*)' ' - write(10,*)'######Shared buffer set up after this point######' - write(10,*)' ' - write(10,*) 'col communicator detais:' - call print_smp_info(decomp_main%COL_INFO) - write(10,*)' ' - write(10,*) 'row communicator detais:' - call print_smp_info(decomp_main%ROW_INFO) - write(10,*)' ' - write(10,*)'Buffer count and displacement of per-core buffers' - write(10,*)'x1cnts:',decomp_main%x1cnts - write(10,*)'y1cnts:',decomp_main%y1cnts - write(10,*)'y2cnts:',decomp_main%y2cnts - write(10,*)'z2cnts:',decomp_main%z2cnts - write(10,*)'x1disp:',decomp_main%x1disp - write(10,*)'y1disp:',decomp_main%y1disp - write(10,*)'y2disp:',decomp_main%y2disp - write(10,*)'z2disp:',decomp_main%z2disp - write(10,*)' ' - write(10,*)'Buffer count and displacement of shared buffers' - write(10,*)'x1cnts:',decomp_main%x1cnts_s - write(10,*)'y1cnts:',decomp_main%y1cnts_s - write(10,*)'y2cnts:',decomp_main%y2cnts_s - write(10,*)'z2cnts:',decomp_main%z2cnts_s - write(10,*)'x1disp:',decomp_main%x1disp_s - write(10,*)'y1disp:',decomp_main%y1disp_s - write(10,*)'y2disp:',decomp_main%y2disp_s - write(10,*)'z2disp:',decomp_main%z2disp_s - write(10,*)' ' - close(10) -#endif - - ! determine the number of bytes per float number - ! do not use 'mytype' which is compiler dependent - ! also possible to use inquire(iolength=...) - call MPI_TYPE_SIZE(real_type,mytype_bytes,ierror) - -#ifdef EVEN - if (nrank==0) write(*,*) 'Padded ALLTOALL optimisation on' -#endif - - return - end subroutine decomp_2d_init - - -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - ! Routine to be called by applications to clean things up -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - subroutine decomp_2d_finalize - - implicit none - - call decomp_info_finalize(decomp_main) - - decomp_buf_size = 0 - deallocate(work1_r, work2_r, work1_c, work2_c) - - return - end subroutine decomp_2d_finalize - - -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - ! Return the default decomposition object -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - subroutine get_decomp_info(decomp) - - implicit none - - TYPE(DECOMP_INFO), intent(OUT) :: decomp - - decomp = decomp_main - - return - end subroutine get_decomp_info - - -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - ! Advanced Interface allowing applications to define globle domain of - ! any size, distribute it, and then transpose data among pencils. - ! - generate 2D decomposition details as defined in DECOMP_INFO - ! - the default global data size is nx*ny*nz - ! - a different global size nx/2+1,ny,nz is used in FFT r2c/c2r - ! - multiple global sizes can co-exist in one application, each - ! using its own DECOMP_INFO object -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - subroutine decomp_info_init(nx,ny,nz,decomp) - - implicit none - - integer, intent(IN) :: nx,ny,nz - TYPE(DECOMP_INFO), intent(INOUT) :: decomp - - integer :: buf_size, status, errorcode - - ! verify the global size can actually be distributed as pencils - if (nx= p_row and ' // & - 'min(ny,nz) >= p_col') - end if - - if (mod(nx,dims(1))==0 .and. mod(ny,dims(1))==0 .and. & - mod(ny,dims(2))==0 .and. mod(nz,dims(2))==0) then - decomp%even = .true. - else - decomp%even = .false. - end if - - ! distribute mesh points - allocate(decomp%x1dist(0:dims(1)-1),decomp%y1dist(0:dims(1)-1), & - decomp%y2dist(0:dims(2)-1),decomp%z2dist(0:dims(2)-1)) - call get_dist(nx,ny,nz,decomp) - - ! generate partition information - starting/ending index etc. - call partition(nx, ny, nz, (/ 1,2,3 /), & - decomp%xst, decomp%xen, decomp%xsz) - call partition(nx, ny, nz, (/ 2,1,3 /), & - decomp%yst, decomp%yen, decomp%ysz) - call partition(nx, ny, nz, (/ 2,3,1 /), & - decomp%zst, decomp%zen, decomp%zsz) - - ! prepare send/receive buffer displacement and count for ALLTOALL(V) - allocate(decomp%x1cnts(0:dims(1)-1),decomp%y1cnts(0:dims(1)-1), & - decomp%y2cnts(0:dims(2)-1),decomp%z2cnts(0:dims(2)-1)) - allocate(decomp%x1disp(0:dims(1)-1),decomp%y1disp(0:dims(1)-1), & - decomp%y2disp(0:dims(2)-1),decomp%z2disp(0:dims(2)-1)) - call prepare_buffer(decomp) - -#ifdef SHM - ! prepare shared-memory information if required - call decomp_info_init_shm(decomp) -#endif - - ! allocate memory for the MPI_ALLTOALL(V) buffers - ! define the buffers globally for performance reason - - buf_size = max(decomp%xsz(1)*decomp%xsz(2)*decomp%xsz(3), & - max(decomp%ysz(1)*decomp%ysz(2)*decomp%ysz(3), & - decomp%zsz(1)*decomp%zsz(2)*decomp%zsz(3)) ) -#ifdef EVEN - ! padded alltoall optimisation may need larger buffer space - buf_size = max(buf_size, & - max(decomp%x1count*dims(1),decomp%y2count*dims(2)) ) -#endif - - ! check if additional memory is required - ! *** TODO: consider how to share the real/complex buffers - if (buf_size > decomp_buf_size) then - decomp_buf_size = buf_size - if (allocated(work1_r)) deallocate(work1_r) - if (allocated(work2_r)) deallocate(work2_r) - if (allocated(work1_c)) deallocate(work1_c) - if (allocated(work2_c)) deallocate(work2_c) - allocate(work1_r(buf_size), STAT=status) - allocate(work2_r(buf_size), STAT=status) - allocate(work1_c(buf_size), STAT=status) - allocate(work2_c(buf_size), STAT=status) - if (status /= 0) then - errorcode = 2 - call decomp_2d_abort(errorcode, & - 'Out of memory when allocating 2DECOMP workspace') - end if - end if - - return - end subroutine decomp_info_init - - -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - ! Release memory associated with a DECOMP_INFO object -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - subroutine decomp_info_finalize(decomp) - - implicit none - - TYPE(DECOMP_INFO), intent(INOUT) :: decomp - - deallocate(decomp%x1dist,decomp%y1dist,decomp%y2dist,decomp%z2dist) - deallocate(decomp%x1cnts,decomp%y1cnts,decomp%y2cnts,decomp%z2cnts) - deallocate(decomp%x1disp,decomp%y1disp,decomp%y2disp,decomp%z2disp) - -#ifdef SHM - deallocate(decomp%x1disp_o,decomp%y1disp_o,decomp%y2disp_o, & - decomp%z2disp_o) - deallocate(decomp%x1cnts_s,decomp%y1cnts_s,decomp%y2cnts_s, & - decomp%z2cnts_s) - deallocate(decomp%x1disp_s,decomp%y1disp_s,decomp%y2disp_s, & - decomp%z2disp_s) -#endif - - return - end subroutine decomp_info_finalize - - -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - ! Coarser mesh support for statistic -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - subroutine init_coarser_mesh_statS(i_skip,j_skip,k_skip,from1) - - implicit none - - integer, intent(IN) :: i_skip,j_skip,k_skip - logical, intent(IN) :: from1 ! .true. - save 1,n+1,2n+1... - ! .false. - save n,2n,3n... - - integer, dimension(3) :: skip - integer :: i - - coarse_mesh_starts_from_1 = from1 - iskipS = i_skip - jskipS = j_skip - kskipS = k_skip - - skip(1)=iskipS - skip(2)=jskipS - skip(3)=kskipS - - do i=1,3 - if (from1) then - xstS(i) = (xstart(i)+skip(i)-1)/skip(i) - if (mod(xstart(i)+skip(i)-1,skip(i))/=0) xstS(i)=xstS(i)+1 - xenS(i) = (xend(i)+skip(i)-1)/skip(i) - else - xstS(i) = xstart(i)/skip(i) - if (mod(xstart(i),skip(i))/=0) xstS(i)=xstS(i)+1 - xenS(i) = xend(i)/skip(i) - end if - xszS(i) = xenS(i)-xstS(i)+1 - end do - - do i=1,3 - if (from1) then - ystS(i) = (ystart(i)+skip(i)-1)/skip(i) - if (mod(ystart(i)+skip(i)-1,skip(i))/=0) ystS(i)=ystS(i)+1 - yenS(i) = (yend(i)+skip(i)-1)/skip(i) - else - ystS(i) = ystart(i)/skip(i) - if (mod(ystart(i),skip(i))/=0) ystS(i)=ystS(i)+1 - yenS(i) = yend(i)/skip(i) - end if - yszS(i) = yenS(i)-ystS(i)+1 - end do - - do i=1,3 - if (from1) then - zstS(i) = (zstart(i)+skip(i)-1)/skip(i) - if (mod(zstart(i)+skip(i)-1,skip(i))/=0) zstS(i)=zstS(i)+1 - zenS(i) = (zend(i)+skip(i)-1)/skip(i) - else - zstS(i) = zstart(i)/skip(i) - if (mod(zstart(i),skip(i))/=0) zstS(i)=zstS(i)+1 - zenS(i) = zend(i)/skip(i) - end if - zszS(i) = zenS(i)-zstS(i)+1 - end do - - return - end subroutine init_coarser_mesh_statS - -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - ! Coarser mesh support for visualization -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - subroutine init_coarser_mesh_statV(i_skip,j_skip,k_skip,from1) - - implicit none - - integer, intent(IN) :: i_skip,j_skip,k_skip - logical, intent(IN) :: from1 ! .true. - save 1,n+1,2n+1... - ! .false. - save n,2n,3n... - - integer, dimension(3) :: skip - integer :: i - - coarse_mesh_starts_from_1 = from1 - iskipV = i_skip - jskipV = j_skip - kskipV = k_skip - - skip(1)=iskipV - skip(2)=jskipV - skip(3)=kskipV - - do i=1,3 - if (from1) then - xstV(i) = (xstart(i)+skip(i)-1)/skip(i) - if (mod(xstart(i)+skip(i)-1,skip(i))/=0) xstV(i)=xstV(i)+1 - xenV(i) = (xend(i)+skip(i)-1)/skip(i) - else - xstV(i) = xstart(i)/skip(i) - if (mod(xstart(i),skip(i))/=0) xstV(i)=xstV(i)+1 - xenV(i) = xend(i)/skip(i) - end if - xszV(i) = xenV(i)-xstV(i)+1 - end do - - do i=1,3 - if (from1) then - ystV(i) = (ystart(i)+skip(i)-1)/skip(i) - if (mod(ystart(i)+skip(i)-1,skip(i))/=0) ystV(i)=ystV(i)+1 - yenV(i) = (yend(i)+skip(i)-1)/skip(i) - else - ystV(i) = ystart(i)/skip(i) - if (mod(ystart(i),skip(i))/=0) ystV(i)=ystV(i)+1 - yenV(i) = yend(i)/skip(i) - end if - yszV(i) = yenV(i)-ystV(i)+1 - end do - - do i=1,3 - if (from1) then - zstV(i) = (zstart(i)+skip(i)-1)/skip(i) - if (mod(zstart(i)+skip(i)-1,skip(i))/=0) zstV(i)=zstV(i)+1 - zenV(i) = (zend(i)+skip(i)-1)/skip(i) - else - zstV(i) = zstart(i)/skip(i) - if (mod(zstart(i),skip(i))/=0) zstV(i)=zstV(i)+1 - zenV(i) = zend(i)/skip(i) - end if - zszV(i) = zenV(i)-zstV(i)+1 - end do - - return - end subroutine init_coarser_mesh_statV - -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - ! Coarser mesh support for probe -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - subroutine init_coarser_mesh_statP(i_skip,j_skip,k_skip,from1) - - implicit none - - integer, intent(IN) :: i_skip,j_skip,k_skip - logical, intent(IN) :: from1 ! .true. - save 1,n+1,2n+1... - ! .false. - save n,2n,3n... - - integer, dimension(3) :: skip - integer :: i - - coarse_mesh_starts_from_1 = from1 - iskipP = i_skip - jskipP = j_skip - kskipP = k_skip - - skip(1)=iskipP - skip(2)=jskipP - skip(3)=kskipP - - do i=1,3 - if (from1) then - xstP(i) = (xstart(i)+skip(i)-1)/skip(i) - if (mod(xstart(i)+skip(i)-1,skip(i))/=0) xstP(i)=xstP(i)+1 - xenP(i) = (xend(i)+skip(i)-1)/skip(i) - else - xstP(i) = xstart(i)/skip(i) - if (mod(xstart(i),skip(i))/=0) xstP(i)=xstP(i)+1 - xenP(i) = xend(i)/skip(i) - end if - xszP(i) = xenP(i)-xstP(i)+1 - end do - - do i=1,3 - if (from1) then - ystP(i) = (ystart(i)+skip(i)-1)/skip(i) - if (mod(ystart(i)+skip(i)-1,skip(i))/=0) ystP(i)=ystP(i)+1 - yenP(i) = (yend(i)+skip(i)-1)/skip(i) - else - ystP(i) = ystart(i)/skip(i) - if (mod(ystart(i),skip(i))/=0) ystP(i)=ystP(i)+1 - yenP(i) = yend(i)/skip(i) - end if - yszP(i) = yenP(i)-ystP(i)+1 - end do - - do i=1,3 - if (from1) then - zstP(i) = (zstart(i)+skip(i)-1)/skip(i) - if (mod(zstart(i)+skip(i)-1,skip(i))/=0) zstP(i)=zstP(i)+1 - zenP(i) = (zend(i)+skip(i)-1)/skip(i) - else - zstP(i) = zstart(i)/skip(i) - if (mod(zstart(i),skip(i))/=0) zstP(i)=zstP(i)+1 - zenP(i) = zend(i)/skip(i) - end if - zszP(i) = zenP(i)-zstP(i)+1 - end do - - return - end subroutine init_coarser_mesh_statP - - ! Copy data from a fine-resolution array to a coarse one for statistic - subroutine fine_to_coarseS(ipencil,var_fine,var_coarse) - - implicit none - - real(mytype), dimension(:,:,:) :: var_fine - real(mytype), dimension(:,:,:) :: var_coarse - integer, intent(IN) :: ipencil - - real(mytype), allocatable, dimension(:,:,:) :: wk, wk2 - integer :: i,j,k - - if (ipencil==1) then - allocate(wk(xstS(1):xenS(1),xstS(2):xenS(2),xstS(3):xenS(3))) - allocate(wk2(xstart(1):xend(1),xstart(2):xend(2),xstart(3):xend(3))) - wk2=var_fine - if (coarse_mesh_starts_from_1) then - do k=xstS(3),xenS(3) - do j=xstS(2),xenS(2) - do i=xstS(1),xenS(1) - wk(i,j,k) = wk2((i-1)*iskipS+1,(j-1)*jskipS+1,(k-1)*kskipS+1) - end do - end do - end do - else - do k=xstS(3),xenS(3) - do j=xstS(2),xenS(2) - do i=xstS(1),xenS(1) - wk(i,j,k) = wk2(i*iskipS,j*jskipS,k*kskipS) - end do - end do - end do - end if - var_coarse=wk - else if (ipencil==2) then - allocate(wk(ystS(1):yenS(1),ystS(2):yenS(2),ystS(3):yenS(3))) - allocate(wk2(ystart(1):yend(1),ystart(2):yend(2),ystart(3):yend(3))) - wk2=var_fine - if (coarse_mesh_starts_from_1) then - do k=ystS(3),yenS(3) - do j=ystS(2),yenS(2) - do i=ystS(1),yenS(1) - wk(i,j,k) = wk2((i-1)*iskipS+1,(j-1)*jskipS+1,(k-1)*kskipS+1) - end do - end do - end do - else - do k=ystS(3),yenS(3) - do j=ystS(2),yenS(2) - do i=ystS(1),yenS(1) - wk(i,j,k) = wk2(i*iskipS,j*jskipS,k*kskipS) - end do - end do - end do - end if - var_coarse=wk - else if (ipencil==3) then - allocate(wk(zstS(1):zenS(1),zstS(2):zenS(2),zstS(3):zenS(3))) - allocate(wk2(zstart(1):zend(1),zstart(2):zend(2),zstart(3):zend(3))) - wk2=var_fine - if (coarse_mesh_starts_from_1) then - do k=zstS(3),zenS(3) - do j=zstS(2),zenS(2) - do i=zstS(1),zenS(1) - wk(i,j,k) = wk2((i-1)*iskipS+1,(j-1)*jskipS+1,(k-1)*kskipS+1) - end do - end do - end do - else - do k=zstS(3),zenS(3) - do j=zstS(2),zenS(2) - do i=zstS(1),zenS(1) - wk(i,j,k) = wk2(i*iskipS,j*jskipS,k*kskipS) - end do - end do - end do - end if - var_coarse=wk - end if - - deallocate(wk,wk2) - - return - end subroutine fine_to_coarseS - - ! Copy data from a fine-resolution array to a coarse one for visualization - subroutine fine_to_coarseV(ipencil,var_fine,var_coarse) - - implicit none - - real(mytype), dimension(:,:,:) :: var_fine - real(mytype), dimension(:,:,:) :: var_coarse - integer, intent(IN) :: ipencil - - real(mytype), allocatable, dimension(:,:,:) :: wk, wk2 - integer :: i,j,k - - if (ipencil==1) then - allocate(wk(xstV(1):xenV(1),xstV(2):xenV(2),xstV(3):xenV(3))) - allocate(wk2(xstart(1):xend(1),xstart(2):xend(2),xstart(3):xend(3))) - wk2=var_fine - if (coarse_mesh_starts_from_1) then - do k=xstV(3),xenV(3) - do j=xstV(2),xenV(2) - do i=xstV(1),xenV(1) - wk(i,j,k) = wk2((i-1)*iskipV+1,(j-1)*jskipV+1,(k-1)*kskipV+1) - end do - end do - end do - else - do k=xstV(3),xenV(3) - do j=xstV(2),xenV(2) - do i=xstV(1),xenV(1) - wk(i,j,k) = wk2(i*iskipV,j*jskipV,k*kskipV) - end do - end do - end do - end if - var_coarse=wk - else if (ipencil==2) then - allocate(wk(ystV(1):yenV(1),ystV(2):yenV(2),ystV(3):yenV(3))) - allocate(wk2(ystart(1):yend(1),ystart(2):yend(2),ystart(3):yend(3))) - wk2=var_fine - if (coarse_mesh_starts_from_1) then - do k=ystV(3),yenV(3) - do j=ystV(2),yenV(2) - do i=ystV(1),yenV(1) - wk(i,j,k) = wk2((i-1)*iskipV+1,(j-1)*jskipV+1,(k-1)*kskipV+1) - end do - end do - end do - else - do k=ystV(3),yenV(3) - do j=ystV(2),yenV(2) - do i=ystV(1),yenV(1) - wk(i,j,k) = wk2(i*iskipV,j*jskipV,k*kskipV) - end do - end do - end do - end if - var_coarse=wk - else if (ipencil==3) then - allocate(wk(zstV(1):zenV(1),zstV(2):zenV(2),zstV(3):zenV(3))) - allocate(wk2(zstart(1):zend(1),zstart(2):zend(2),zstart(3):zend(3))) - wk2=var_fine - if (coarse_mesh_starts_from_1) then - do k=zstV(3),zenV(3) - do j=zstV(2),zenV(2) - do i=zstV(1),zenV(1) - wk(i,j,k) = wk2((i-1)*iskipV+1,(j-1)*jskipV+1,(k-1)*kskipV+1) - end do - end do - end do - else - do k=zstV(3),zenV(3) - do j=zstV(2),zenV(2) - do i=zstV(1),zenV(1) - wk(i,j,k) = wk2(i*iskipV,j*jskipV,k*kskipV) - end do - end do - end do - end if - var_coarse=wk - end if - - deallocate(wk,wk2) - - return - end subroutine fine_to_coarseV - - ! Copy data from a fine-resolution array to a coarse one for probe - subroutine fine_to_coarseP(ipencil,var_fine,var_coarse) - - implicit none - - real(mytype), dimension(:,:,:) :: var_fine - real(mytype), dimension(:,:,:) :: var_coarse - integer, intent(IN) :: ipencil - - real(mytype), allocatable, dimension(:,:,:) :: wk, wk2 - integer :: i,j,k - - if (ipencil==1) then - allocate(wk(xstP(1):xenP(1),xstP(2):xenP(2),xstP(3):xenP(3))) - allocate(wk2(xstart(1):xend(1),xstart(2):xend(2),xstart(3):xend(3))) - wk2=var_fine - if (coarse_mesh_starts_from_1) then - do k=xstP(3),xenP(3) - do j=xstP(2),xenP(2) - do i=xstP(1),xenP(1) - wk(i,j,k) = wk2((i-1)*iskipP+1,(j-1)*jskipP+1,(k-1)*kskipP+1) - end do - end do - end do - else - do k=xstP(3),xenP(3) - do j=xstP(2),xenP(2) - do i=xstP(1),xenP(1) - wk(i,j,k) = wk2(i*iskipP,j*jskipP,k*kskipP) - end do - end do - end do - end if - var_coarse=wk - else if (ipencil==2) then - allocate(wk(ystP(1):yenP(1),ystP(2):yenP(2),ystP(3):yenP(3))) - allocate(wk2(ystart(1):yend(1),ystart(2):yend(2),ystart(3):yend(3))) - wk2=var_fine - if (coarse_mesh_starts_from_1) then - do k=ystP(3),yenP(3) - do j=ystP(2),yenP(2) - do i=ystP(1),yenP(1) - wk(i,j,k) = wk2((i-1)*iskipP+1,(j-1)*jskipP+1,(k-1)*kskipP+1) - end do - end do - end do - else - do k=ystP(3),yenP(3) - do j=ystP(2),yenP(2) - do i=ystP(1),yenP(1) - wk(i,j,k) = wk2(i*iskipP,j*jskipP,k*kskipP) - end do - end do - end do - end if - var_coarse=wk - else if (ipencil==3) then - allocate(wk(zstP(1):zenP(1),zstP(2):zenP(2),zstP(3):zenP(3))) - allocate(wk2(zstart(1):zend(1),zstart(2):zend(2),zstart(3):zend(3))) - wk2=var_fine - if (coarse_mesh_starts_from_1) then - do k=zstP(3),zenP(3) - do j=zstP(2),zenP(2) - do i=zstP(1),zenP(1) - wk(i,j,k) = wk2((i-1)*iskipP+1,(j-1)*jskipP+1,(k-1)*kskipP+1) - end do - end do - end do - else - do k=zstP(3),zenP(3) - do j=zstP(2),zenP(2) - do i=zstP(1),zenP(1) - wk(i,j,k) = wk2(i*iskipP,j*jskipP,k*kskipP) - end do - end do - end do - end if - var_coarse=wk - end if - - deallocate(wk,wk2) - - return - end subroutine fine_to_coarseP - - -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - ! Find sub-domain information held by current processor - ! INPUT: - ! nx, ny, nz - global data dimension - ! pdim(3) - number of processor grid in each dimension, - ! valid values: 1 - distibute locally; - ! 2 - distribute across p_row; - ! 3 - distribute across p_col - ! OUTPUT: - ! lstart(3) - starting index - ! lend(3) - ending index - ! lsize(3) - size of the sub-block (redundant) -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - subroutine partition(nx, ny, nz, pdim, lstart, lend, lsize) - - implicit none - - integer, intent(IN) :: nx, ny, nz - integer, dimension(3), intent(IN) :: pdim - integer, dimension(3), intent(OUT) :: lstart, lend, lsize - - integer, allocatable, dimension(:) :: st,en,sz - integer :: i, gsize - - do i = 1, 3 - - if (i==1) then - gsize = nx - else if (i==2) then - gsize = ny - else if (i==3) then - gsize = nz - end if - - if (pdim(i) == 1) then ! all local - lstart(i) = 1 - lend(i) = gsize - lsize(i) = gsize - elseif (pdim(i) == 2) then ! distribute across dims(1) - allocate(st(0:dims(1)-1)) - allocate(en(0:dims(1)-1)) - allocate(sz(0:dims(1)-1)) - call distribute(gsize,dims(1),st,en,sz) - lstart(i) = st(coord(1)) - lend(i) = en(coord(1)) - lsize(i) = sz(coord(1)) - deallocate(st,en,sz) - elseif (pdim(i) == 3) then ! distribute across dims(2) - allocate(st(0:dims(2)-1)) - allocate(en(0:dims(2)-1)) - allocate(sz(0:dims(2)-1)) - call distribute(gsize,dims(2),st,en,sz) - lstart(i) = st(coord(2)) - lend(i) = en(coord(2)) - lsize(i) = sz(coord(2)) - deallocate(st,en,sz) - end if - - end do - return - - end subroutine partition - -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - ! - distibutes grid points in one dimension - ! - handles uneven distribution properly -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - subroutine distribute(data1,proc,st,en,sz) - - implicit none - ! data1 -- data size in any dimension to be partitioned - ! proc -- number of processors in that dimension - ! st -- array of starting index - ! en -- array of ending index - ! sz -- array of local size (redundent) - integer data1,proc,st(0:proc-1),en(0:proc-1),sz(0:proc-1) - integer i,size1,nl,nu - - size1=data1/proc - nu = data1 - size1 * proc - nl = proc - nu - st(0) = 1 - sz(0) = size1 - en(0) = size1 - do i=1,nl-1 - st(i) = st(i-1) + size1 - sz(i) = size1 - en(i) = en(i-1) + size1 - end do - size1 = size1 + 1 - do i=nl,proc-1 - st(i) = en(i-1) + 1 - sz(i) = size1 - en(i) = en(i-1) + size1 - end do - en(proc-1)= data1 - sz(proc-1)= data1-st(proc-1)+1 - - return - end subroutine distribute - -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - ! Define how each dimension is distributed across processors - ! e.g. 17 meshes across 4 processor would be distibuted as (4,4,4,5) - ! such global information is required locally at MPI_ALLTOALLV time -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - subroutine get_dist(nx,ny,nz,decomp) - - integer, intent(IN) :: nx, ny, nz - TYPE(DECOMP_INFO), intent(INOUT) :: decomp - integer, allocatable, dimension(:) :: st,en - - allocate(st(0:dims(1)-1)) - allocate(en(0:dims(1)-1)) - call distribute(nx,dims(1),st,en,decomp%x1dist) - call distribute(ny,dims(1),st,en,decomp%y1dist) - deallocate(st,en) - - allocate(st(0:dims(2)-1)) - allocate(en(0:dims(2)-1)) - call distribute(ny,dims(2),st,en,decomp%y2dist) - call distribute(nz,dims(2),st,en,decomp%z2dist) - deallocate(st,en) - - return - end subroutine get_dist - -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - ! Prepare the send / receive buffers for MPI_ALLTOALLV communications -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - subroutine prepare_buffer(decomp) - - implicit none - - TYPE(DECOMP_INFO), intent(INOUT) :: decomp - - integer :: i - - ! MPI_ALLTOALLV buffer information - - do i=0, dims(1)-1 - decomp%x1cnts(i) = decomp%x1dist(i)*decomp%xsz(2)*decomp%xsz(3) - decomp%y1cnts(i) = decomp%ysz(1)*decomp%y1dist(i)*decomp%ysz(3) - if (i==0) then - decomp%x1disp(i) = 0 ! displacement is 0-based index - decomp%y1disp(i) = 0 - else - decomp%x1disp(i) = decomp%x1disp(i-1) + decomp%x1cnts(i-1) - decomp%y1disp(i) = decomp%y1disp(i-1) + decomp%y1cnts(i-1) - end if - end do - - do i=0, dims(2)-1 - decomp%y2cnts(i) = decomp%ysz(1)*decomp%y2dist(i)*decomp%ysz(3) - decomp%z2cnts(i) = decomp%zsz(1)*decomp%zsz(2)*decomp%z2dist(i) - if (i==0) then - decomp%y2disp(i) = 0 ! displacement is 0-based index - decomp%z2disp(i) = 0 - else - decomp%y2disp(i) = decomp%y2disp(i-1) + decomp%y2cnts(i-1) - decomp%z2disp(i) = decomp%z2disp(i-1) + decomp%z2cnts(i-1) - end if - end do - - ! MPI_ALLTOALL buffer information - - ! For evenly distributed data, following is an easier implementation. - ! But it should be covered by the more general formulation below. - !decomp%x1count = decomp%xsz(1)*decomp%xsz(2)*decomp%xsz(3)/dims(1) - !decomp%y1count = decomp%ysz(1)*decomp%ysz(2)*decomp%ysz(3)/dims(1) - !decomp%y2count = decomp%ysz(1)*decomp%ysz(2)*decomp%ysz(3)/dims(2) - !decomp%z2count = decomp%zsz(1)*decomp%zsz(2)*decomp%zsz(3)/dims(2) - - ! For unevenly distributed data, pad smaller messages. Note the - ! last blocks along pencils always get assigned more mesh points - ! for X <=> Y transposes - decomp%x1count = decomp%x1dist(dims(1)-1) * & - decomp%y1dist(dims(1)-1) * decomp%xsz(3) - decomp%y1count = decomp%x1count - ! for Y <=> Z transposes - decomp%y2count = decomp%y2dist(dims(2)-1) * & - decomp%z2dist(dims(2)-1) * decomp%zsz(1) - decomp%z2count = decomp%y2count - - return - end subroutine prepare_buffer - -#ifdef SHM - -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - ! Generate shared-memory information -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - subroutine decomp_info_init_shm(decomp) - - implicit none - - TYPE(DECOMP_INFO), intent(INOUT) :: decomp - - ! a copy of old displacement array (will be overwritten by shm code) - allocate(decomp%x1disp_o(0:dims(1)-1),decomp%y1disp_o(0:dims(1)-1), & - decomp%y2disp_o(0:dims(2)-1),decomp%z2disp_o(0:dims(2)-1)) - decomp%x1disp_o = decomp%x1disp - decomp%y1disp_o = decomp%y1disp - decomp%y2disp_o = decomp%y2disp - decomp%z2disp_o = decomp%z2disp - - call prepare_shared_buffer(decomp%ROW_INFO,DECOMP_2D_COMM_ROW,decomp) - call prepare_shared_buffer(decomp%COL_INFO,DECOMP_2D_COMM_COL,decomp) - - return - end subroutine decomp_info_init_shm - - -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - ! For shared-memory implementation, prepare send/recv shared buffer -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - subroutine prepare_shared_buffer(C,MPI_COMM,decomp) - - implicit none - - TYPE(SMP_INFO) :: C - INTEGER :: MPI_COMM - TYPE(DECOMP_INFO) :: decomp - - INTEGER, ALLOCATABLE :: KTBL(:,:),NARY(:,:),KTBLALL(:,:) - INTEGER MYSMP, MYCORE, COLOR - - integer :: ierror - - C%MPI_COMM = MPI_COMM - CALL MPI_COMM_SIZE(MPI_COMM,C%NCPU,ierror) - CALL MPI_COMM_RANK(MPI_COMM,C%NODE_ME,ierror) - C%SMP_COMM = MPI_COMM_NULL - C%CORE_COMM = MPI_COMM_NULL - C%SMP_ME= 0 - C%NCORE = 0 - C%CORE_ME = 0 - C%MAXCORE = 0 - C%NSMP = 0 - C%N_SND = 0 - C%N_RCV = 0 - C%SND_P = 0 - C%RCV_P = 0 - C%SND_P_c = 0 - C%RCV_P_c = 0 - - ! get smp-node map for this communicator and set up smp communicators - CALL GET_SMP_MAP(C%MPI_COMM, C%NSMP, MYSMP, & - C%NCORE, MYCORE, C%MAXCORE) - C%SMP_ME = MYSMP + 1 - C%CORE_ME = MYCORE + 1 - ! - set up inter/intra smp-node communicators - COLOR = MYCORE - IF (COLOR.GT.0) COLOR = MPI_UNDEFINED - CALL MPI_Comm_split(C%MPI_COMM, COLOR, MYSMP, C%SMP_COMM, ierror) - CALL MPI_Comm_split(C%MPI_COMM, MYSMP, MYCORE, C%CORE_COMM, ierror) - ! - allocate work space - ALLOCATE(KTBL(C%MAXCORE,C%NSMP),NARY(C%NCPU,C%NCORE)) - ALLOCATE(KTBLALL(C%MAXCORE,C%NSMP)) - ! - set up smp-node/core to node_me lookup table - KTBL = 0 - KTBL(C%CORE_ME,C%SMP_ME) = C%NODE_ME + 1 - CALL MPI_ALLREDUCE(KTBL,KTBLALL,C%NSMP*C%MAXCORE,MPI_INTEGER, & - MPI_SUM,MPI_COMM,ierror) - KTBL=KTBLALL - ! IF (SUM(KTBL) /= C%NCPU*(C%NCPU+1)/2) & - ! CALL MPI_ABORT(... - - ! compute offsets in shared SNDBUF and RCVBUF - CALL MAPSET_SMPSHM(C, KTBL, NARY, decomp) - - DEALLOCATE(KTBL,NARY) - - return - end subroutine prepare_shared_buffer - -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - ! Use Ian Bush's FreeIPC to generate shared-memory information - ! - system independent solution - ! - replacing David Tanqueray's implementation in alloc_shm.c - ! (old C code renamed to get_smp_map2) -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - subroutine get_smp_map(comm, nnodes, my_node, ncores, my_core, maxcor) - - use FIPC_module - - implicit none - - integer, intent(IN) :: comm - integer, intent(OUT) :: nnodes, my_node, ncores, my_core, maxcor - - integer :: intra_comm, extra_comm - integer :: ierror - - call FIPC_init(comm, ierror) - - ! intra_comm: communicator for processes on this shared memory node - ! extra_comm: communicator for all rank 0 on each shared memory node - call FIPC_ctxt_intra_comm(FIPC_ctxt_world, intra_comm, ierror) - call FIPC_ctxt_extra_comm(FIPC_ctxt_world, extra_comm, ierror) - - call MPI_COMM_SIZE(intra_comm, ncores, ierror) - call MPI_COMM_RANK(intra_comm, my_core, ierror) - - ! only rank 0 on each shared memory node member of extra_comm - ! for others extra_comm = MPI_COMM_NULL - if (extra_comm /= MPI_COMM_NULL) then - call MPI_COMM_SIZE(extra_comm, nnodes, ierror) - call MPI_COMM_RANK(extra_comm, my_node, ierror) - end if - - ! other ranks share the same information as their leaders - call MPI_BCAST( nnodes, 1, MPI_INTEGER, 0, intra_comm, ierror) - call MPI_BCAST(my_node, 1, MPI_INTEGER, 0, intra_comm, ierror) - - ! maxcor - call MPI_ALLREDUCE(ncores, maxcor, 1, MPI_INTEGER, MPI_MAX, & - MPI_COMM_WORLD, ierror) - - call FIPC_finalize(ierror) - - return - - end subroutine get_smp_map - - -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - ! Set up smp-node based shared memory maps -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - SUBROUTINE MAPSET_SMPSHM(C, KTBL, NARY, decomp) - - IMPLICIT NONE - - TYPE (SMP_INFO) C - INTEGER KTBL(C%MAXCORE,C%NSMP) - INTEGER NARY(C%NCPU,C%NCORE) - TYPE (DECOMP_INFO) :: decomp - - INTEGER i, j, k, l, N, PTR, BSIZ, ierror, status, seed - character*16 s - - BSIZ = C%N_SND - - ! a - SNDBUF - IF (C%MPI_COMM==DECOMP_2D_COMM_COL) THEN - ALLOCATE(decomp%x1cnts_s(C%NSMP),decomp%x1disp_s(C%NSMP+1), & - stat=status) - CALL MPI_Allgather(decomp%x1cnts, C%NCPU, MPI_INTEGER, & - NARY, C%NCPU, MPI_INTEGER, C%CORE_COMM, ierror) - PTR = 0 - DO i=1,C%NSMP - decomp%x1disp_s(i) = PTR - N = 0 - DO j=1,C%MAXCORE - k = KTBL(j,i) - IF (k > 0) then - DO l=1,C%NCORE - IF (l == C%CORE_ME) decomp%x1disp_o(k-1) = PTR - N = N + NARY(k,l) - PTR = PTR + NARY(k,l) - END DO - END IF - END DO - decomp%x1cnts_s(i) = N - END DO - decomp%x1disp_s(C%NSMP+1) = PTR - IF (PTR > BSIZ) BSIZ = PTR - - ELSE IF (C%MPI_COMM==DECOMP_2D_COMM_ROW) THEN - ALLOCATE(decomp%y2cnts_s(C%NSMP),decomp%y2disp_s(C%NSMP+1), & - stat=status) - CALL MPI_Allgather(decomp%y2cnts, C%NCPU, MPI_INTEGER, & - NARY, C%NCPU, MPI_INTEGER, C%CORE_COMM, ierror) - PTR = 0 - DO i=1,C%NSMP - decomp%y2disp_s(i) = PTR - N = 0 - DO j=1,C%MAXCORE - k = KTBL(j,i) - IF (k > 0) then - DO l=1,C%NCORE - IF (l == C%CORE_ME) decomp%y2disp_o(k-1) = PTR - N = N + NARY(k,l) - PTR = PTR + NARY(k,l) - END DO - END IF - END DO - decomp%y2cnts_s(i) = N - END DO - decomp%y2disp_s(C%NSMP+1) = PTR - IF (PTR > BSIZ) BSIZ = PTR - END IF - - ! b - RCVBUF - - IF (C%MPI_COMM==DECOMP_2D_COMM_COL) THEN - ALLOCATE(decomp%y1cnts_s(C%NSMP),decomp%y1disp_s(C%NSMP+1), & - stat=status) - CALL MPI_Allgather(decomp%y1cnts, C%NCPU, MPI_INTEGER, & - NARY, C%NCPU, MPI_INTEGER, C%CORE_COMM, ierror) - PTR = 0 - DO i=1,C%NSMP - decomp%y1disp_s(i) = PTR - N=0 - DO j=1,C%NCORE - DO l=1,C%MAXCORE - k = KTBL(l,i) - IF (k > 0) then - IF (j == C%CORE_ME) decomp%y1disp_o(k-1) = PTR - N = N + NARY(k,j) - PTR = PTR + NARY(k,j) - END IF - END DO - END DO - decomp%y1cnts_s(i) = N - END DO - decomp%y1disp_s(C%NSMP+1) = PTR - IF (PTR > BSIZ) BSIZ = PTR - - ELSE IF (C%MPI_COMM==DECOMP_2D_COMM_ROW) THEN - ALLOCATE(decomp%z2cnts_s(C%NSMP),decomp%z2disp_s(C%NSMP+1), & - stat=status) - CALL MPI_Allgather(decomp%z2cnts, C%NCPU, MPI_INTEGER, & - NARY, C%NCPU, MPI_INTEGER, C%CORE_COMM, ierror) - PTR = 0 - DO i=1,C%NSMP - decomp%z2disp_s(i) = PTR - N=0 - DO j=1,C%NCORE - DO l=1,C%MAXCORE - k = KTBL(l,i) - IF (k > 0) then - IF (j == C%CORE_ME) decomp%z2disp_o(k-1) = PTR - N = N + NARY(k,j) - PTR = PTR + NARY(k,j) - END IF - END DO - END DO - decomp%z2cnts_s(i) = N - END DO - decomp%z2disp_s(C%NSMP+1) = PTR - IF (PTR > BSIZ) BSIZ = PTR - - END IF - - ! check buffer size and (re)-allocate buffer space if necessary - IF (BSIZ > C%N_SND) then - IF (C%SND_P /= 0) CALL DEALLOC_SHM(C%SND_P, C%CORE_COMM) - ! make sure each rank has unique keys to get shared memory - !IF (C%MPI_COMM==DECOMP_2D_COMM_COL) THEN - ! seed = nrank+nproc*0+1 ! has to be non-zero - !ELSE IF (C%MPI_COMM==DECOMP_2D_COMM_ROW) THEN - ! seed = nrank+nproc*1+1 - !END IF - status = 1 - !CALL ALLOC_SHM(C%SND_P, BSIZ, real_type, C%CORE_COMM, status, & - ! seed) - CALL ALLOC_SHM(C%SND_P, BSIZ, real_type, C%CORE_COMM, status) - C%N_SND = BSIZ - - IF (C%RCV_P /= 0) CALL DEALLOC_SHM(C%RCV_P, C%CORE_COMM) - status = 1 - CALL ALLOC_SHM(C%RCV_P, BSIZ, real_type, C%CORE_COMM, status) - C%N_RCV = BSIZ - - IF (C%SND_P_c /= 0) CALL DEALLOC_SHM(C%SND_P_c, C%CORE_COMM) - status = 1 - CALL ALLOC_SHM(C%SND_P_c, BSIZ, complex_type, C%CORE_COMM, status) - C%N_SND = BSIZ - - IF (C%RCV_P_c /= 0) CALL DEALLOC_SHM(C%RCV_P_c, C%CORE_COMM) - status = 1 - CALL ALLOC_SHM(C%RCV_P_c, BSIZ, complex_type, C%CORE_COMM, status) - C%N_RCV = BSIZ - - - END IF - - RETURN - END SUBROUTINE MAPSET_SMPSHM - -#endif - - -#ifdef OCC - ! For non-blocking communication code, progress the comminication stack - subroutine transpose_test(handle) - - implicit none - - integer :: handle, ierror - - call NBC_TEST(handle,ierror) - - return - end subroutine transpose_test -#endif - - -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - ! Transposition routines -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! -#include "transpose_x_to_y.inc" -#include "transpose_y_to_z.inc" -#include "transpose_z_to_y.inc" -#include "transpose_y_to_x.inc" - - -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - ! Auto-tuning algorithm to select the best 2D processor grid -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - subroutine best_2d_grid(iproc, best_p_row, best_p_col) - - implicit none - - integer, intent(IN) :: iproc - integer, intent(OUT) :: best_p_row, best_p_col - - integer, allocatable, dimension(:) :: factors - double precision :: t1, t2, best_time - integer :: nfact, i, row, col, ierror, errorcode - - real(mytype), allocatable, dimension(:,:,:) :: u1, u2, u3 - - TYPE(DECOMP_INFO) :: decomp - - if (nrank==0) write(*,*) 'In auto-tuning mode......' - - best_time = huge(t1) - best_p_row = -1 - best_p_col = -1 - - i = int(sqrt(real(iproc))) + 10 ! enough space to save all factors - allocate(factors(i)) - call findfactor(iproc, factors, nfact) - if (nrank==0) write(*,*) 'factors: ', (factors(i), i=1,nfact) - - do i=1, nfact - - row = factors(i) - col = iproc / row - - ! enforce the limitation of 2D decomposition - if (min(nx_global,ny_global)>=row .and. & - min(ny_global,nz_global)>=col) then - - ! 2D Catersian topology - dims(1) = row - dims(2) = col - periodic(1) = .false. - periodic(2) = .false. - call MPI_CART_CREATE(MPI_COMM_WORLD,2,dims,periodic, & - .false.,DECOMP_2D_COMM_CART_X, ierror) - call MPI_CART_COORDS(DECOMP_2D_COMM_CART_X,nrank,2,coord,ierror) - - ! communicators defining sub-groups for ALLTOALL(V) - call MPI_CART_SUB(DECOMP_2D_COMM_CART_X,(/.true.,.false./), & - DECOMP_2D_COMM_COL,ierror) - call MPI_CART_SUB(DECOMP_2D_COMM_CART_X,(/.false.,.true./), & - DECOMP_2D_COMM_ROW,ierror) - - ! generate 2D decomposition information for this row*col - call decomp_info_init(nx_global,ny_global,nz_global,decomp) - - ! arrays for X,Y and Z-pencils - allocate(u1(decomp%xsz(1),decomp%xsz(2),decomp%xsz(3))) - allocate(u2(decomp%ysz(1),decomp%ysz(2),decomp%ysz(3))) - allocate(u3(decomp%zsz(1),decomp%zsz(2),decomp%zsz(3))) - - ! timing the transposition routines - t1 = MPI_WTIME() - call transpose_x_to_y(u1,u2,decomp) - call transpose_y_to_z(u2,u3,decomp) - call transpose_z_to_y(u3,u2,decomp) - call transpose_y_to_x(u2,u1,decomp) - t2 = MPI_WTIME() - t1 - - deallocate(u1,u2,u3) - call decomp_info_finalize(decomp) - - call MPI_ALLREDUCE(t2,t1,1,MPI_DOUBLE_PRECISION,MPI_SUM, & - MPI_COMM_WORLD,ierror) - t1 = t1 / dble(nproc) - - if (nrank==0) then - write(*,*) 'processor grid', row, ' by ', col, ' time=', t1 - end if - - if (best_time > t1) then - best_time = t1 - best_p_row = row - best_p_col = col - end if - - end if - - end do ! loop through processer grid - - deallocate(factors) - - if (best_p_row/=-1) then - if (nrank==0) then - write(*,*) 'the best processor grid is probably ', & - best_p_row, ' by ', best_p_col - end if - else - errorcode = 9 - call decomp_2d_abort(errorcode, & - 'The processor-grid auto-tuning code failed. ' // & - 'The number of processes requested is probably too large.') - end if - - return - end subroutine best_2d_grid - -#include "factor.inc" - -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - ! Halo cell support -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! -#include "halo.inc" - - -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - ! Error handling -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - subroutine decomp_2d_abort(errorcode, msg) - - implicit none - - integer, intent(IN) :: errorcode - character(len=*), intent(IN) :: msg - - integer :: ierror - - if (nrank==0) then - write(*,*) '2DECOMP&FFT ERROR - errorcode: ', errorcode - write(*,*) 'ERROR MESSAGE: ' // msg - end if - call MPI_ABORT(MPI_COMM_WORLD,errorcode,ierror) - - return - end subroutine decomp_2d_abort - - -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - ! Utility routines to help allocate 3D arrays -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! -#include "alloc.inc" - - -end module decomp_2d - diff --git a/decomp2d/factor.inc b/decomp2d/factor.inc deleted file mode 100644 index 1ea2988..0000000 --- a/decomp2d/factor.inc +++ /dev/null @@ -1,82 +0,0 @@ -!======================================================================= -! This is part of the 2DECOMP&FFT library -! -! 2DECOMP&FFT is a software framework for general-purpose 2D (pencil) -! decomposition. It also implements a highly scalable distributed -! three-dimensional Fast Fourier Transform (FFT). -! -! Copyright (C) 2009-2011 Ning Li, the Numerical Algorithms Group (NAG) -! -!======================================================================= - -! A few utility routines to find factors of integer numbers - -subroutine findfactor(num, factors, nfact) - -implicit none - -integer, intent(IN) :: num -integer, intent(OUT), dimension(*) :: factors -integer, intent(OUT) :: nfact -integer :: i, m - -! find the factors <= sqrt(num) -m = int(sqrt(real(num))) -nfact = 1 -do i=1,m -if (num/i*i == num) then -factors(nfact) = i -nfact = nfact + 1 -end if -end do -nfact = nfact - 1 - -! derive those > sqrt(num) -if (factors(nfact)**2/=num) then -do i=nfact+1, 2*nfact -factors(i) = num / factors(2*nfact-i+1) -end do -nfact = nfact * 2 -else -do i=nfact+1, 2*nfact-1 -factors(i) = num / factors(2*nfact-i) -end do -nfact = nfact * 2 - 1 -endif - -return - -end subroutine findfactor - - -subroutine primefactors(num, factors, nfact) - -implicit none - -integer, intent(IN) :: num -integer, intent(OUT), dimension(*) :: factors -integer, intent(INOUT) :: nfact - -integer :: i, n - -i = 2 -nfact = 1 -n = num -do -if (mod(n,i) == 0) then -factors(nfact) = i -nfact = nfact + 1 -n = n / i -else -i = i + 1 -end if -if (n == 1) then -nfact = nfact - 1 -exit -end if -end do - -return - -end subroutine primefactors - diff --git a/decomp2d/fft_common.inc b/decomp2d/fft_common.inc deleted file mode 100644 index 2e7b45c..0000000 --- a/decomp2d/fft_common.inc +++ /dev/null @@ -1,187 +0,0 @@ -!======================================================================= -! This is part of the 2DECOMP&FFT library -! -! 2DECOMP&FFT is a software framework for general-purpose 2D (pencil) -! decomposition. It also implements a highly scalable distributed -! three-dimensional Fast Fourier Transform (FFT). -! -! Copyright (C) 2009-2011 Ning Li, the Numerical Algorithms Group (NAG) -! -!======================================================================= - -! This file contains common code shared by all FFT engines - -integer, parameter, public :: DECOMP_2D_FFT_FORWARD = -1 -integer, parameter, public :: DECOMP_2D_FFT_BACKWARD = 1 - -! Physical space data can be stored in either X-pencil or Z-pencil -integer, parameter, public :: PHYSICAL_IN_X = 1 -integer, parameter, public :: PHYSICAL_IN_Z = 3 - -integer, save :: format ! input X-pencil or Z-pencil - -! The libary can only be initialised once -logical, save :: initialised = .false. - -! Global size of the FFT -integer, save :: nx_fft, ny_fft, nz_fft - -! 2D processor grid -integer, save, dimension(2) :: dims - -! Decomposition objects -TYPE(DECOMP_INFO), save :: ph ! physical space -TYPE(DECOMP_INFO), save :: sp ! spectral space - -! Workspace to store the intermediate Y-pencil data -! *** TODO: investigate how to use only one workspace array -complex(mytype), allocatable, dimension(:,:,:) :: wk2_c2c, wk2_r2c -complex(mytype), allocatable, dimension(:,:,:) :: wk13 - -public :: decomp_2d_fft_init, decomp_2d_fft_3d, & -decomp_2d_fft_finalize, decomp_2d_fft_get_size - -! Declare generic interfaces to handle different inputs - -interface decomp_2d_fft_init -module procedure fft_init_noarg -module procedure fft_init_arg -module procedure fft_init_general -end interface - -interface decomp_2d_fft_3d -module procedure fft_3d_c2c -module procedure fft_3d_r2c -module procedure fft_3d_c2r -end interface - - -contains - - -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! -! Initialise the FFT module -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! -subroutine fft_init_noarg - -implicit none - -call fft_init_arg(PHYSICAL_IN_X) ! default input is X-pencil data - -return -end subroutine fft_init_noarg - -subroutine fft_init_arg(pencil) ! allow to handle Z-pencil input - -implicit none - -integer, intent(IN) :: pencil - -call fft_init_general(pencil, nx_global, ny_global, nz_global) - -return -end subroutine fft_init_arg - -! Initialise the FFT library to perform arbitrary size transforms -subroutine fft_init_general(pencil, nx, ny, nz) - -implicit none - -integer, intent(IN) :: pencil -integer, intent(IN) :: nx, ny, nz - -logical, dimension(2) :: dummy_periods -integer, dimension(2) :: dummy_coords -integer :: status, errorcode, ierror - -if (initialised) then -errorcode = 4 -call decomp_2d_abort(errorcode, & -'FFT library should only be initialised once') -end if - -format = pencil -nx_fft = nx -ny_fft = ny -nz_fft = nz - -! determine the processor grid in use -call MPI_CART_GET(DECOMP_2D_COMM_CART_X, 2, & -dims, dummy_periods, dummy_coords, ierror) - -! for c2r/r2c interface: -! if in physical space, a real array is of size: nx*ny*nz -! in spectral space, the complex array is of size: -! (nx/2+1)*ny*nz, if PHYSICAL_IN_X -! or nx*ny*(nz/2+1), if PHYSICAL_IN_Z - -call decomp_info_init(nx, ny, nz, ph) -if (format==PHYSICAL_IN_X) then -call decomp_info_init(nx/2+1, ny, nz, sp) -else if (format==PHYSICAL_IN_Z) then -call decomp_info_init(nx, ny, nz/2+1, sp) -end if - -allocate(wk2_c2c(ph%ysz(1),ph%ysz(2),ph%ysz(3)), STAT=status) -allocate(wk2_r2c(sp%ysz(1),sp%ysz(2),sp%ysz(3)), STAT=status) -if (format==PHYSICAL_IN_X) then -allocate(wk13(sp%xsz(1),sp%xsz(2),sp%xsz(3)), STAT=status) -else if (format==PHYSICAL_IN_Z) then -allocate(wk13(sp%zsz(1),sp%zsz(2),sp%zsz(3)), STAT=status) -end if -if (status /= 0) then -errorcode = 3 -call decomp_2d_abort(errorcode, & -'Out of memory when initialising FFT') -end if - -call init_fft_engine - -initialised = .true. - -return -end subroutine fft_init_general - - -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! -! Final clean up -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! -subroutine decomp_2d_fft_finalize - -implicit none - -call decomp_info_finalize(ph) -call decomp_info_finalize(sp) - -deallocate(wk2_c2c, wk2_r2c, wk13) - -call finalize_fft_engine - -initialised = .false. - -return -end subroutine decomp_2d_fft_finalize - - -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! -! Return the size, starting/ending index of the distributed array -! whose global size is (nx/2+1)*ny*nz, for defining data structures -! in r2c and c2r interfaces -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! -subroutine decomp_2d_fft_get_size(istart, iend, isize) - -implicit none -integer, dimension(3), intent(OUT) :: istart, iend, isize - -if (format==PHYSICAL_IN_X) then -istart = sp%zst -iend = sp%zen -isize = sp%zsz -else if (format==PHYSICAL_IN_Z) then -istart = sp%xst -iend = sp%xen -isize = sp%xsz -end if - -return -end subroutine decomp_2d_fft_get_size diff --git a/decomp2d/fft_common_3d.inc b/decomp2d/fft_common_3d.inc deleted file mode 100644 index 8c9696e..0000000 --- a/decomp2d/fft_common_3d.inc +++ /dev/null @@ -1,253 +0,0 @@ -!======================================================================= -! This is part of the 2DECOMP&FFT library -! -! 2DECOMP&FFT is a software framework for general-purpose 2D (pencil) -! decomposition. It also implements a highly scalable distributed -! three-dimensional Fast Fourier Transform (FFT). -! -! Copyright (C) 2009-2011 Ning Li, the Numerical Algorithms Group (NAG) -! -!======================================================================= - -! This file contains 3D c2c/r2c/c2r transform subroutines which are -! identical for several FFT engines - -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! -! 3D FFT - complex to complex -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! -subroutine fft_3d_c2c(in, out, isign) - -implicit none - -complex(mytype), dimension(:,:,:), intent(INOUT) :: in -complex(mytype), dimension(:,:,:), intent(OUT) :: out -integer, intent(IN) :: isign - -#ifndef OVERWRITE -complex(mytype), allocatable, dimension(:,:,:) :: wk1 -#endif - -if (format==PHYSICAL_IN_X .AND. isign==DECOMP_2D_FFT_FORWARD .OR. & -format==PHYSICAL_IN_Z .AND. isign==DECOMP_2D_FFT_BACKWARD) then - -! ===== 1D FFTs in X ===== -#ifdef OVERWRITE -call c2c_1m_x(in,isign,ph) -#else -allocate (wk1(ph%xsz(1),ph%xsz(2),ph%xsz(3))) -wk1 = in -call c2c_1m_x(wk1,isign,ph) -#endif - -! ===== Swap X --> Y; 1D FFTs in Y ===== - -if (dims(1)>1) then -#ifdef OVERWRITE -call transpose_x_to_y(in,wk2_c2c,ph) -#else -call transpose_x_to_y(wk1,wk2_c2c,ph) -#endif -call c2c_1m_y(wk2_c2c,isign,ph) -else -#ifdef OVERWRITE -call c2c_1m_y(in,isign,ph) -#else -call c2c_1m_y(wk1,isign,ph) -#endif -end if - -! ===== Swap Y --> Z; 1D FFTs in Z ===== -if (dims(1)>1) then -call transpose_y_to_z(wk2_c2c,out,ph) -else -#ifdef OVERWRITE -call transpose_y_to_z(in,out,ph) -#else -call transpose_y_to_z(wk1,out,ph) -#endif -end if -call c2c_1m_z(out,isign,ph) - -else if (format==PHYSICAL_IN_X .AND. isign==DECOMP_2D_FFT_BACKWARD & -.OR. & -format==PHYSICAL_IN_Z .AND. isign==DECOMP_2D_FFT_FORWARD) then - -! ===== 1D FFTs in Z ===== -#ifdef OVERWRITE -call c2c_1m_z(in,isign,ph) -#else -allocate (wk1(ph%zsz(1),ph%zsz(2),ph%zsz(3))) -wk1 = in -call c2c_1m_z(wk1,isign,ph) -#endif - -! ===== Swap Z --> Y; 1D FFTs in Y ===== -if (dims(1)>1) then -#ifdef OVERWRITE -call transpose_z_to_y(in,wk2_c2c,ph) -#else -call transpose_z_to_y(wk1,wk2_c2c,ph) -#endif -call c2c_1m_y(wk2_c2c,isign,ph) -else ! out==wk2_c2c if 1D decomposition -#ifdef OVERWRITE -call transpose_z_to_y(in,out,ph) -#else -call transpose_z_to_y(wk1,out,ph) -#endif -call c2c_1m_y(out,isign,ph) -end if - -! ===== Swap Y --> X; 1D FFTs in X ===== -if (dims(1)>1) then -call transpose_y_to_x(wk2_c2c,out,ph) -end if -call c2c_1m_x(out,isign,ph) - -end if - -return -end subroutine fft_3d_c2c - - -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! -! 3D forward FFT - real to complex -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! -subroutine fft_3d_r2c(in_r, out_c) - -implicit none - -real(mytype), dimension(:,:,:), intent(IN) :: in_r -complex(mytype), dimension(:,:,:), intent(OUT) :: out_c - -if (format==PHYSICAL_IN_X) then - -! ===== 1D FFTs in X ===== -call r2c_1m_x(in_r,wk13) - -! ===== Swap X --> Y; 1D FFTs in Y ===== -if (dims(1)>1) then -call transpose_x_to_y(wk13,wk2_r2c,sp) -call c2c_1m_y(wk2_r2c,-1,sp) -else -call c2c_1m_y(wk13,-1,sp) -end if - -! ===== Swap Y --> Z; 1D FFTs in Z ===== -if (dims(1)>1) then -call transpose_y_to_z(wk2_r2c,out_c,sp) -else -call transpose_y_to_z(wk13,out_c,sp) -end if -call c2c_1m_z(out_c,-1,sp) - -else if (format==PHYSICAL_IN_Z) then - -! ===== 1D FFTs in Z ===== -call r2c_1m_z(in_r,wk13) - -! ===== Swap Z --> Y; 1D FFTs in Y ===== -if (dims(1)>1) then -call transpose_z_to_y(wk13,wk2_r2c,sp) -call c2c_1m_y(wk2_r2c,-1,sp) -else ! out_c==wk2_r2c if 1D decomposition -call transpose_z_to_y(wk13,out_c,sp) -call c2c_1m_y(out_c,-1,sp) -end if - -! ===== Swap Y --> X; 1D FFTs in X ===== -if (dims(1)>1) then -call transpose_y_to_x(wk2_r2c,out_c,sp) -end if -call c2c_1m_x(out_c,-1,sp) - -end if - -return -end subroutine fft_3d_r2c - - -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! -! 3D inverse FFT - complex to real -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! -subroutine fft_3d_c2r(in_c, out_r) - -implicit none - -complex(mytype), dimension(:,:,:), intent(INOUT) :: in_c -real(mytype), dimension(:,:,:), intent(OUT) :: out_r - -#ifndef OVERWRITE -complex(mytype), allocatable, dimension(:,:,:) :: wk1 -#endif - -if (format==PHYSICAL_IN_X) then - -! ===== 1D FFTs in Z ===== -#ifdef OVERWRITE -call c2c_1m_z(in_c,1,sp) -#else -allocate(wk1(sp%zsz(1),sp%zsz(2),sp%zsz(3))) -wk1 = in_c -call c2c_1m_z(wk1,1,sp) -#endif - -! ===== Swap Z --> Y; 1D FFTs in Y ===== -#ifdef OVERWRITE -call transpose_z_to_y(in_c,wk2_r2c,sp) -#else -call transpose_z_to_y(wk1,wk2_r2c,sp) -#endif -call c2c_1m_y(wk2_r2c,1,sp) - -! ===== Swap Y --> X; 1D FFTs in X ===== -if (dims(1)>1) then -call transpose_y_to_x(wk2_r2c,wk13,sp) -call c2r_1m_x(wk13,out_r) -else -call c2r_1m_x(wk2_r2c,out_r) -end if - -else if (format==PHYSICAL_IN_Z) then - -! ===== 1D FFTs in X ===== -#ifdef OVERWRITE -call c2c_1m_x(in_c,1,sp) -#else -allocate(wk1(sp%xsz(1),sp%xsz(2),sp%xsz(3))) -wk1 = in_c -call c2c_1m_x(wk1,1,sp) -#endif - -! ===== Swap X --> Y; 1D FFTs in Y ===== -if (dims(1)>1) then -#ifdef OVERWRITE -call transpose_x_to_y(in_c,wk2_r2c,sp) -#else -call transpose_x_to_y(wk1,wk2_r2c,sp) -#endif -call c2c_1m_y(wk2_r2c,1,sp) -else ! in_c==wk2_r2c if 1D decomposition -#ifdef OVERWRITE -call c2c_1m_y(in_c,1,sp) -#else -call c2c_1m_y(wk1,1,sp) -#endif -end if - -! ===== Swap Y --> Z; 1D FFTs in Z ===== -if (dims(1)>1) then -call transpose_y_to_z(wk2_r2c,wk13,sp) -else -#ifdef OVERWRITE -call transpose_y_to_z(in_c,wk13,sp) -#else -call transpose_y_to_z(wk1,wk13,sp) -#endif -end if -call c2r_1m_z(wk13,out_r) - -end if - -return -end subroutine fft_3d_c2r diff --git a/decomp2d/fft_fftw3.f90 b/decomp2d/fft_fftw3.f90 deleted file mode 100644 index 09edb0d..0000000 --- a/decomp2d/fft_fftw3.f90 +++ /dev/null @@ -1,724 +0,0 @@ -!======================================================================= -! This is part of the 2DECOMP&FFT library -! -! 2DECOMP&FFT is a software framework for general-purpose 2D (pencil) -! decomposition. It also implements a highly scalable distributed -! three-dimensional Fast Fourier Transform (FFT). -! -! Copyright (C) 2009-2011 Ning Li, the Numerical Algorithms Group (NAG) -! -!======================================================================= - -! This is the FFTW (version 3.x) implementation of the FFT library - -module decomp_2d_fft - - use decomp_2d ! 2D decomposition module - - implicit none - - include "fftw3.f" - - private ! Make everything private unless declared public - - ! engine-specific global variables - integer, save :: plan_type = FFTW_MEASURE - - ! FFTW plans - ! j=1,2,3 corresponds to the 1D FFTs in X,Y,Z direction, respectively - ! For c2c transforms: - ! use plan(-1,j) for forward transform; - ! use plan( 1,j) for backward transform; - ! For r2c/c2r transforms: - ! use plan(0,j) for r2c transforms; - ! use plan(2,j) for c2r transforms; - integer*8, save :: plan(-1:2,3) - - ! common code used for all engines, including global variables, - ! generic interface definitions and several subroutines -#include "fft_common.inc" - - ! Return a FFTW3 plan for multiple 1D c2c FFTs in X direction - subroutine c2c_1m_x_plan(plan1, decomp, isign) - - implicit none - - integer*8, intent(OUT) :: plan1 - TYPE(DECOMP_INFO), intent(IN) :: decomp - integer, intent(IN) :: isign - - complex(mytype), allocatable, dimension(:,:,:) :: a1 - - allocate(a1(decomp%xsz(1),decomp%xsz(2),decomp%xsz(3))) - -#ifdef DOUBLE_PREC - call dfftw_plan_many_dft(plan1, 1, decomp%xsz(1), & - decomp%xsz(2)*decomp%xsz(3), a1, decomp%xsz(1), 1, & - decomp%xsz(1), a1, decomp%xsz(1), 1, decomp%xsz(1), & - isign, plan_type) -#else - call sfftw_plan_many_dft(plan1, 1, decomp%xsz(1), & - decomp%xsz(2)*decomp%xsz(3), a1, decomp%xsz(1), 1, & - decomp%xsz(1), a1, decomp%xsz(1), 1, decomp%xsz(1), & - isign, plan_type) -#endif - - deallocate(a1) - - return - end subroutine c2c_1m_x_plan - - ! Return a FFTW3 plan for multiple 1D c2c FFTs in Y direction - subroutine c2c_1m_y_plan(plan1, decomp, isign) - - implicit none - - integer*8, intent(OUT) :: plan1 - TYPE(DECOMP_INFO), intent(IN) :: decomp - integer, intent(IN) :: isign - - complex(mytype), allocatable, dimension(:,:) :: a1 - - ! Due to memory pattern of 3D arrays, 1D FFTs along Y have to be - ! done one Z-plane at a time. So plan for 2D data sets here. - - allocate(a1(decomp%ysz(1),decomp%ysz(2))) - -#ifdef DOUBLE_PREC - call dfftw_plan_many_dft(plan1, 1, decomp%ysz(2), decomp%ysz(1), & - a1, decomp%ysz(2), decomp%ysz(1), 1, a1, decomp%ysz(2), & - decomp%ysz(1), 1, isign, plan_type) -#else - call sfftw_plan_many_dft(plan1, 1, decomp%ysz(2), decomp%ysz(1), & - a1, decomp%ysz(2), decomp%ysz(1), 1, a1, decomp%ysz(2), & - decomp%ysz(1), 1, isign, plan_type) -#endif - - deallocate(a1) - - return - end subroutine c2c_1m_y_plan - - - ! Return a FFTW3 plan for multiple 1D c2c FFTs in Z direction - subroutine c2c_1m_z_plan(plan1, decomp, isign) - - implicit none - - integer*8, intent(OUT) :: plan1 - TYPE(DECOMP_INFO), intent(IN) :: decomp - integer, intent(IN) :: isign - - complex(mytype), allocatable, dimension(:,:,:) :: a1 - - allocate(a1(decomp%zsz(1),decomp%zsz(2),decomp%zsz(3))) - -#ifdef DOUBLE_PREC - call dfftw_plan_many_dft(plan1, 1, decomp%zsz(3), & - decomp%zsz(1)*decomp%zsz(2), a1, decomp%zsz(3), & - decomp%zsz(1)*decomp%zsz(2), 1, a1, decomp%zsz(3), & - decomp%zsz(1)*decomp%zsz(2), 1, isign, plan_type) -#else - call sfftw_plan_many_dft(plan1, 1, decomp%zsz(3), & - decomp%zsz(1)*decomp%zsz(2), a1, decomp%zsz(3), & - decomp%zsz(1)*decomp%zsz(2), 1, a1, decomp%zsz(3), & - decomp%zsz(1)*decomp%zsz(2), 1, isign, plan_type) -#endif - - deallocate(a1) - - return - end subroutine c2c_1m_z_plan - - - ! Return a FFTW3 plan for multiple 1D r2c FFTs in X direction - subroutine r2c_1m_x_plan(plan1, decomp_ph, decomp_sp) - - implicit none - - integer*8, intent(OUT) :: plan1 - TYPE(DECOMP_INFO), intent(IN) :: decomp_ph - TYPE(DECOMP_INFO), intent(IN) :: decomp_sp - - real(mytype), allocatable, dimension(:,:,:) :: a1 - complex(mytype), allocatable, dimension(:,:,:) :: a2 - - allocate(a1(decomp_ph%xsz(1),decomp_ph%xsz(2),decomp_ph%xsz(3))) - allocate(a2(decomp_sp%xsz(1),decomp_sp%xsz(2),decomp_sp%xsz(3))) -#ifdef DOUBLE_PREC - call dfftw_plan_many_dft_r2c(plan1, 1, decomp_ph%xsz(1), & - decomp_ph%xsz(2)*decomp_ph%xsz(3), a1, decomp_ph%xsz(1), 1, & - decomp_ph%xsz(1), a2, decomp_sp%xsz(1), 1, decomp_sp%xsz(1), & - plan_type) -#else - call sfftw_plan_many_dft_r2c(plan1, 1, decomp_ph%xsz(1), & - decomp_ph%xsz(2)*decomp_ph%xsz(3), a1, decomp_ph%xsz(1), 1, & - decomp_ph%xsz(1), a2, decomp_sp%xsz(1), 1, decomp_sp%xsz(1), & - plan_type) -#endif - deallocate(a1,a2) - - return - end subroutine r2c_1m_x_plan - - - ! Return a FFTW3 plan for multiple 1D c2r FFTs in X direction - subroutine c2r_1m_x_plan(plan1, decomp_sp, decomp_ph) - - implicit none - - integer*8, intent(OUT) :: plan1 - TYPE(DECOMP_INFO), intent(IN) :: decomp_sp - TYPE(DECOMP_INFO), intent(IN) :: decomp_ph - - complex(mytype), allocatable, dimension(:,:,:) :: a1 - real(mytype), allocatable, dimension(:,:,:) :: a2 - - allocate(a1(decomp_sp%xsz(1),decomp_sp%xsz(2),decomp_sp%xsz(3))) - allocate(a2(decomp_ph%xsz(1),decomp_ph%xsz(2),decomp_ph%xsz(3))) -#ifdef DOUBLE_PREC - call dfftw_plan_many_dft_c2r(plan1, 1, decomp_ph%xsz(1), & - decomp_ph%xsz(2)*decomp_ph%xsz(3), a1, decomp_sp%xsz(1), 1, & - decomp_sp%xsz(1), a2, decomp_ph%xsz(1), 1, decomp_ph%xsz(1), & - plan_type) -#else - call sfftw_plan_many_dft_c2r(plan1, 1, decomp_ph%xsz(1), & - decomp_ph%xsz(2)*decomp_ph%xsz(3), a1, decomp_sp%xsz(1), 1, & - decomp_sp%xsz(1), a2, decomp_ph%xsz(1), 1, decomp_ph%xsz(1), & - plan_type) -#endif - deallocate(a1,a2) - - return - end subroutine c2r_1m_x_plan - - - ! Return a FFTW3 plan for multiple 1D r2c FFTs in Z direction - subroutine r2c_1m_z_plan(plan1, decomp_ph, decomp_sp) - - implicit none - - integer*8, intent(OUT) :: plan1 - TYPE(DECOMP_INFO), intent(IN) :: decomp_ph - TYPE(DECOMP_INFO), intent(IN) :: decomp_sp - - real(mytype), allocatable, dimension(:,:,:) :: a1 - complex(mytype), allocatable, dimension(:,:,:) :: a2 - - allocate(a1(decomp_ph%zsz(1),decomp_ph%zsz(2),decomp_ph%zsz(3))) - allocate(a2(decomp_sp%zsz(1),decomp_sp%zsz(2),decomp_sp%zsz(3))) -#ifdef DOUBLE_PREC - call dfftw_plan_many_dft_r2c(plan1, 1, decomp_ph%zsz(3), & - decomp_ph%zsz(1)*decomp_ph%zsz(2), a1, decomp_ph%zsz(3), & - decomp_ph%zsz(1)*decomp_ph%zsz(2), 1, a2, decomp_sp%zsz(3), & - decomp_sp%zsz(1)*decomp_sp%zsz(2), 1, plan_type) -#else - call sfftw_plan_many_dft_r2c(plan1, 1, decomp_ph%zsz(3), & - decomp_ph%zsz(1)*decomp_ph%zsz(2), a1, decomp_ph%zsz(3), & - decomp_ph%zsz(1)*decomp_ph%zsz(2), 1, a2, decomp_sp%zsz(3), & - decomp_sp%zsz(1)*decomp_sp%zsz(2), 1, plan_type) -#endif - deallocate(a1,a2) - - return - end subroutine r2c_1m_z_plan - - - ! Return a FFTW3 plan for multiple 1D c2r FFTs in Z direction - subroutine c2r_1m_z_plan(plan1, decomp_sp, decomp_ph) - - implicit none - - integer*8, intent(OUT) :: plan1 - TYPE(DECOMP_INFO), intent(IN) :: decomp_sp - TYPE(DECOMP_INFO), intent(IN) :: decomp_ph - - complex(mytype), allocatable, dimension(:,:,:) :: a1 - real(mytype), allocatable, dimension(:,:,:) :: a2 - - allocate(a1(decomp_sp%zsz(1),decomp_sp%zsz(2),decomp_sp%zsz(3))) - allocate(a2(decomp_ph%zsz(1),decomp_ph%zsz(2),decomp_ph%zsz(3))) - -#ifdef DOUBLE_PREC - call dfftw_plan_many_dft_c2r(plan1, 1, decomp_ph%zsz(3), & - decomp_ph%zsz(1)*decomp_ph%zsz(2), a1, decomp_sp%zsz(3), & - decomp_sp%zsz(1)*decomp_sp%zsz(2), 1, a2, decomp_ph%zsz(3), & - decomp_ph%zsz(1)*decomp_ph%zsz(2), 1, plan_type) -#else - call sfftw_plan_many_dft_c2r(plan1, 1, decomp_ph%zsz(3), & - decomp_ph%zsz(1)*decomp_ph%zsz(2), a1, decomp_sp%zsz(3), & - decomp_sp%zsz(1)*decomp_sp%zsz(2), 1, a2, decomp_ph%zsz(3), & - decomp_ph%zsz(1)*decomp_ph%zsz(2), 1, plan_type) -#endif - deallocate(a1,a2) - - return - end subroutine c2r_1m_z_plan - - -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - ! This routine performs one-time initialisations for the FFT engine -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - subroutine init_fft_engine - - implicit none - - if (nrank==0) then - write(*,*) ' ' - write(*,*) '***** Using the FFTW (version 3.x) engine *****' - write(*,*) ' ' - end if - - if (format == PHYSICAL_IN_X) then - - ! For C2C transforms - call c2c_1m_x_plan(plan(-1,1), ph, FFTW_FORWARD ) - call c2c_1m_y_plan(plan(-1,2), ph, FFTW_FORWARD ) - call c2c_1m_z_plan(plan(-1,3), ph, FFTW_FORWARD ) - call c2c_1m_z_plan(plan( 1,3), ph, FFTW_BACKWARD) - call c2c_1m_y_plan(plan( 1,2), ph, FFTW_BACKWARD) - call c2c_1m_x_plan(plan( 1,1), ph, FFTW_BACKWARD) - - ! For R2C/C2R tranforms - call r2c_1m_x_plan(plan(0,1), ph, sp) - call c2c_1m_y_plan(plan(0,2), sp, FFTW_FORWARD ) - call c2c_1m_z_plan(plan(0,3), sp, FFTW_FORWARD ) - call c2c_1m_z_plan(plan(2,3), sp, FFTW_BACKWARD) - call c2c_1m_y_plan(plan(2,2), sp, FFTW_BACKWARD) - call c2r_1m_x_plan(plan(2,1), sp, ph) - - else if (format == PHYSICAL_IN_Z) then - - ! For C2C transforms - call c2c_1m_z_plan(plan(-1,3), ph, FFTW_FORWARD ) - call c2c_1m_y_plan(plan(-1,2), ph, FFTW_FORWARD ) - call c2c_1m_x_plan(plan(-1,1), ph, FFTW_FORWARD ) - call c2c_1m_x_plan(plan( 1,1), ph, FFTW_BACKWARD) - call c2c_1m_y_plan(plan( 1,2), ph, FFTW_BACKWARD) - call c2c_1m_z_plan(plan( 1,3), ph, FFTW_BACKWARD) - - ! For R2C/C2R tranforms - call r2c_1m_z_plan(plan(0,3), ph, sp) - call c2c_1m_y_plan(plan(0,2), sp, FFTW_FORWARD ) - call c2c_1m_x_plan(plan(0,1), sp, FFTW_FORWARD ) - call c2c_1m_x_plan(plan(2,1), sp, FFTW_BACKWARD) - call c2c_1m_y_plan(plan(2,2), sp, FFTW_BACKWARD) - call c2r_1m_z_plan(plan(2,3), sp, ph) - - end if - - return - end subroutine init_fft_engine - - -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - ! This routine performs one-time finalisations for the FFT engine -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - subroutine finalize_fft_engine - - implicit none - - integer :: i,j - - do j=1,3 - do i=-1,2 -#ifdef DOUBLE_PREC - call dfftw_destroy_plan(plan(i,j)) -#else - call sfftw_destroy_plan(plan(i,j)) -#endif - end do - end do - - return - end subroutine finalize_fft_engine - - - ! Following routines calculate multiple one-dimensional FFTs to form - ! the basis of three-dimensional FFTs. - - ! c2c transform, multiple 1D FFTs in x direction - subroutine c2c_1m_x(inout, isign, plan1) - - implicit none - - complex(mytype), dimension(:,:,:), intent(INOUT) :: inout - integer, intent(IN) :: isign - integer*8, intent(IN) :: plan1 - -#ifdef DOUBLE_PREC - call dfftw_execute_dft(plan1, inout, inout) -#else - call sfftw_execute_dft(plan1, inout, inout) -#endif - - return - end subroutine c2c_1m_x - - - ! c2c transform, multiple 1D FFTs in y direction - subroutine c2c_1m_y(inout, isign, plan1) - - implicit none - - complex(mytype), dimension(:,:,:), intent(INOUT) :: inout - integer, intent(IN) :: isign - integer*8, intent(IN) :: plan1 - - integer :: k, s3 - - ! transform on one Z-plane at a time - s3 = size(inout,3) - do k=1,s3 -#ifdef DOUBLE_PREC - call dfftw_execute_dft(plan1, inout(:,:,k), inout(:,:,k)) -#else - call sfftw_execute_dft(plan1, inout(:,:,k), inout(:,:,k)) -#endif - end do - - return - end subroutine c2c_1m_y - - ! c2c transform, multiple 1D FFTs in z direction - subroutine c2c_1m_z(inout, isign, plan1) - - implicit none - - complex(mytype), dimension(:,:,:), intent(INOUT) :: inout - integer, intent(IN) :: isign - integer*8, intent(IN) :: plan1 - -#ifdef DOUBLE_PREC - call dfftw_execute_dft(plan1, inout, inout) -#else - call sfftw_execute_dft(plan1, inout, inout) -#endif - - return - end subroutine c2c_1m_z - - ! r2c transform, multiple 1D FFTs in x direction - subroutine r2c_1m_x(input, output) - - implicit none - - real(mytype), dimension(:,:,:), intent(IN) :: input - complex(mytype), dimension(:,:,:), intent(OUT) :: output - -#ifdef DOUBLE_PREC - call dfftw_execute_dft_r2c(plan(0,1), input, output) -#else - call sfftw_execute_dft_r2c(plan(0,1), input, output) -#endif - - return - - end subroutine r2c_1m_x - - ! r2c transform, multiple 1D FFTs in z direction - subroutine r2c_1m_z(input, output) - - implicit none - - real(mytype), dimension(:,:,:), intent(IN) :: input - complex(mytype), dimension(:,:,:), intent(OUT) :: output - -#ifdef DOUBLE_PREC - call dfftw_execute_dft_r2c(plan(0,3), input, output) -#else - call sfftw_execute_dft_r2c(plan(0,3), input, output) -#endif - - return - - end subroutine r2c_1m_z - - ! c2r transform, multiple 1D FFTs in x direction - subroutine c2r_1m_x(input, output) - - implicit none - - complex(mytype), dimension(:,:,:), intent(IN) :: input - real(mytype), dimension(:,:,:), intent(OUT) :: output - -#ifdef DOUBLE_PREC - call dfftw_execute_dft_c2r(plan(2,1), input, output) -#else - call sfftw_execute_dft_c2r(plan(2,1), input, output) -#endif - - return - - end subroutine c2r_1m_x - - ! c2r transform, multiple 1D FFTs in z direction - subroutine c2r_1m_z(input, output) - - implicit none - - complex(mytype), dimension(:,:,:), intent(IN) :: input - real(mytype), dimension(:,:,:), intent(OUT) :: output - -#ifdef DOUBLE_PREC - call dfftw_execute_dft_c2r(plan(2,3), input, output) -#else - call sfftw_execute_dft_c2r(plan(2,3), input, output) -#endif - - return - - end subroutine c2r_1m_z - - - -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - ! 3D FFT - complex to complex -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - subroutine fft_3d_c2c(in, out, isign) - - implicit none - - complex(mytype), dimension(:,:,:), intent(INOUT) :: in - complex(mytype), dimension(:,:,:), intent(OUT) :: out - integer, intent(IN) :: isign - -#ifndef OVERWRITE - complex(mytype), allocatable, dimension(:,:,:) :: wk1 -#endif - - if (format==PHYSICAL_IN_X .AND. isign==DECOMP_2D_FFT_FORWARD .OR. & - format==PHYSICAL_IN_Z .AND. isign==DECOMP_2D_FFT_BACKWARD) then - - ! ===== 1D FFTs in X ===== -#ifdef OVERWRITE - call c2c_1m_x(in,isign,plan(isign,1)) -#else - allocate (wk1(ph%xsz(1),ph%xsz(2),ph%xsz(3))) - wk1 = in - call c2c_1m_x(wk1,isign,plan(isign,1)) -#endif - - ! ===== Swap X --> Y; 1D FFTs in Y ===== - - if (dims(1)>1) then -#ifdef OVERWRITE - call transpose_x_to_y(in,wk2_c2c,ph) -#else - call transpose_x_to_y(wk1,wk2_c2c,ph) -#endif - call c2c_1m_y(wk2_c2c,isign,plan(isign,2)) - else -#ifdef OVERWRITE - call c2c_1m_y(in,isign,plan(isign,2)) -#else - call c2c_1m_y(wk1,isign,plan(isign,2)) -#endif - end if - - ! ===== Swap Y --> Z; 1D FFTs in Z ===== - if (dims(1)>1) then - call transpose_y_to_z(wk2_c2c,out,ph) - else -#ifdef OVERWRITE - call transpose_y_to_z(in,out,ph) -#else - call transpose_y_to_z(wk1,out,ph) -#endif - end if - call c2c_1m_z(out,isign,plan(isign,3)) - - else if (format==PHYSICAL_IN_X .AND. isign==DECOMP_2D_FFT_BACKWARD & - .OR. & - format==PHYSICAL_IN_Z .AND. isign==DECOMP_2D_FFT_FORWARD) then - - ! ===== 1D FFTs in Z ===== -#ifdef OVERWRITE - call c2c_1m_z(in,isign,plan(isign,3)) -#else - allocate (wk1(ph%zsz(1),ph%zsz(2),ph%zsz(3))) - wk1 = in - call c2c_1m_z(wk1,isign,plan(isign,3)) -#endif - - ! ===== Swap Z --> Y; 1D FFTs in Y ===== - if (dims(1)>1) then -#ifdef OVERWRITE - call transpose_z_to_y(in,wk2_c2c,ph) -#else - call transpose_z_to_y(wk1,wk2_c2c,ph) -#endif - call c2c_1m_y(wk2_c2c,isign,plan(isign,2)) - else ! out==wk2_c2c if 1D decomposition -#ifdef OVERWRITE - call transpose_z_to_y(in,out,ph) -#else - call transpose_z_to_y(wk1,out,ph) -#endif - call c2c_1m_y(out,isign,plan(isign,2)) - end if - - ! ===== Swap Y --> X; 1D FFTs in X ===== - if (dims(1)>1) then - call transpose_y_to_x(wk2_c2c,out,ph) - end if - call c2c_1m_x(out,isign,plan(isign,1)) - - end if - -#ifndef OVERWRITE - deallocate (wk1) -#endif - - return - end subroutine fft_3d_c2c - - -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - ! 3D forward FFT - real to complex -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - subroutine fft_3d_r2c(in_r, out_c) - - implicit none - - real(mytype), dimension(:,:,:), intent(IN) :: in_r - complex(mytype), dimension(:,:,:), intent(OUT) :: out_c - - if (format==PHYSICAL_IN_X) then - - ! ===== 1D FFTs in X ===== - call r2c_1m_x(in_r,wk13) - - ! ===== Swap X --> Y; 1D FFTs in Y ===== - if (dims(1)>1) then - call transpose_x_to_y(wk13,wk2_r2c,sp) - call c2c_1m_y(wk2_r2c,-1,plan(0,2)) - else - call c2c_1m_y(wk13,-1,plan(0,2)) - end if - - ! ===== Swap Y --> Z; 1D FFTs in Z ===== - if (dims(1)>1) then - call transpose_y_to_z(wk2_r2c,out_c,sp) - else - call transpose_y_to_z(wk13,out_c,sp) - end if - call c2c_1m_z(out_c,-1,plan(0,3)) - - else if (format==PHYSICAL_IN_Z) then - - ! ===== 1D FFTs in Z ===== - call r2c_1m_z(in_r,wk13) - - ! ===== Swap Z --> Y; 1D FFTs in Y ===== - if (dims(1)>1) then - call transpose_z_to_y(wk13,wk2_r2c,sp) - call c2c_1m_y(wk2_r2c,-1,plan(0,2)) - else ! out_c==wk2_r2c if 1D decomposition - call transpose_z_to_y(wk13,out_c,sp) - call c2c_1m_y(out_c,-1,plan(0,2)) - end if - - ! ===== Swap Y --> X; 1D FFTs in X ===== - if (dims(1)>1) then - call transpose_y_to_x(wk2_r2c,out_c,sp) - end if - call c2c_1m_x(out_c,-1,plan(0,1)) - - end if - - return - end subroutine fft_3d_r2c - - -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - ! 3D inverse FFT - complex to real -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - subroutine fft_3d_c2r(in_c, out_r) - - implicit none - - complex(mytype), dimension(:,:,:), intent(INOUT) :: in_c - real(mytype), dimension(:,:,:), intent(OUT) :: out_r - -#ifndef OVERWRITE - complex(mytype), allocatable, dimension(:,:,:) :: wk1 -#endif - - if (format==PHYSICAL_IN_X) then - - ! ===== 1D FFTs in Z ===== -#ifdef OVERWRITE - call c2c_1m_z(in_c,1,plan(2,3)) -#else - allocate (wk1(sp%zsz(1),sp%zsz(2),sp%zsz(3))) - wk1 = in_c - call c2c_1m_z(wk1,1,plan(2,3)) -#endif - - ! ===== Swap Z --> Y; 1D FFTs in Y ===== -#ifdef OVERWRITE - call transpose_z_to_y(in_c,wk2_r2c,sp) -#else - call transpose_z_to_y(wk1,wk2_r2c,sp) -#endif - call c2c_1m_y(wk2_r2c,1,plan(2,2)) - - ! ===== Swap Y --> X; 1D FFTs in X ===== - if (dims(1)>1) then - call transpose_y_to_x(wk2_r2c,wk13,sp) - call c2r_1m_x(wk13,out_r) - else - call c2r_1m_x(wk2_r2c,out_r) - end if - - else if (format==PHYSICAL_IN_Z) then - - ! ===== 1D FFTs in X ===== -#ifdef OVERWRITE - call c2c_1m_x(in_c,1,plan(2,1)) -#else - allocate(wk1(sp%xsz(1),sp%xsz(2),sp%xsz(3))) - wk1 = in_c - call c2c_1m_x(wk1,1,plan(2,1)) -#endif - - ! ===== Swap X --> Y; 1D FFTs in Y ===== - if (dims(1)>1) then -#ifdef OVERWRITE - call transpose_x_to_y(in_c,wk2_r2c,sp) -#else - call transpose_x_to_y(wk1,wk2_r2c,sp) -#endif - call c2c_1m_y(wk2_r2c,1,plan(2,2)) - else ! in_c==wk2_r2c if 1D decomposition -#ifdef OVERWRITE - call c2c_1m_y(in_c,1,plan(2,2)) -#else - call c2c_1m_y(wk1,1,plan(2,2)) -#endif - end if - - ! ===== Swap Y --> Z; 1D FFTs in Z ===== - if (dims(1)>1) then - call transpose_y_to_z(wk2_r2c,wk13,sp) - else -#ifdef OVERWRITE - call transpose_y_to_z(in_c,wk13,sp) -#else - call transpose_y_to_z(wk1,wk13,sp) -#endif - end if - call c2r_1m_z(wk13,out_r) - - end if - -#ifndef OVERWRITE - deallocate (wk1) -#endif - - return - end subroutine fft_3d_c2r - - -end module decomp_2d_fft diff --git a/decomp2d/fft_generic.f90 b/decomp2d/fft_generic.f90 deleted file mode 100644 index 4fb5c41..0000000 --- a/decomp2d/fft_generic.f90 +++ /dev/null @@ -1,303 +0,0 @@ -!======================================================================= -! This is part of the 2DECOMP&FFT library -! -! 2DECOMP&FFT is a software framework for general-purpose 2D (pencil) -! decomposition. It also implements a highly scalable distributed -! three-dimensional Fast Fourier Transform (FFT). -! -! Copyright (C) 2009-2011 Ning Li, the Numerical Algorithms Group (NAG) -! -!======================================================================= - -! This is the 'generic' implementation of the FFT library - -module decomp_2d_fft - - use decomp_2d ! 2D decomposition module - use glassman - - implicit none - - private ! Make everything private unless declared public - - ! engine-specific global variables - complex(mytype), allocatable, dimension(:) :: buf, scratch - - ! common code used for all engines, including global variables, - ! generic interface definitions and several subroutines -#include "fft_common.inc" - -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - ! This routine performs one-time initialisations for the FFT engine -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - subroutine init_fft_engine - - implicit none - - integer :: cbuf_size - - if (nrank==0) then - write(*,*) ' ' - write(*,*) '***** Using the generic FFT engine *****' - write(*,*) ' ' - end if - - cbuf_size = max(ph%xsz(1), ph%ysz(2)) - cbuf_size = max(cbuf_size, ph%zsz(3)) - allocate(buf(cbuf_size)) - allocate(scratch(cbuf_size)) - - return - end subroutine init_fft_engine - - -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - ! This routine performs one-time finalisations for the FFT engine -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - subroutine finalize_fft_engine - - implicit none - - deallocate(buf,scratch) - - return - end subroutine finalize_fft_engine - - - ! Following routines calculate multiple one-dimensional FFTs to form - ! the basis of three-dimensional FFTs. - - ! c2c transform, multiple 1D FFTs in x direction - subroutine c2c_1m_x(inout, isign, decomp) - - implicit none - - complex(mytype), dimension(:,:,:), intent(INOUT) :: inout - integer, intent(IN) :: isign - TYPE(DECOMP_INFO), intent(IN) :: decomp - - integer :: i,j,k - - do k=1,decomp%xsz(3) - do j=1,decomp%xsz(2) - do i=1,decomp%xsz(1) - buf(i) = inout(i,j,k) - end do - call spcfft(buf,decomp%xsz(1),isign,scratch) - do i=1,decomp%xsz(1) - inout(i,j,k) = buf(i) - end do - end do - end do - - return - - end subroutine c2c_1m_x - - ! c2c transform, multiple 1D FFTs in y direction - subroutine c2c_1m_y(inout, isign, decomp) - - implicit none - - complex(mytype), dimension(:,:,:), intent(INOUT) :: inout - integer, intent(IN) :: isign - TYPE(DECOMP_INFO), intent(IN) :: decomp - - integer :: i,j,k - - do k=1,decomp%ysz(3) - do i=1,decomp%ysz(1) - do j=1,decomp%ysz(2) - buf(j) = inout(i,j,k) - end do - call spcfft(buf,decomp%ysz(2),isign,scratch) - do j=1,decomp%ysz(2) - inout(i,j,k) = buf(j) - end do - end do - end do - - return - - end subroutine c2c_1m_y - - ! c2c transform, multiple 1D FFTs in z direction - subroutine c2c_1m_z(inout, isign, decomp) - - implicit none - - complex(mytype), dimension(:,:,:), intent(INOUT) :: inout - integer, intent(IN) :: isign - TYPE(DECOMP_INFO), intent(IN) :: decomp - - integer :: i,j,k - - do j=1,decomp%zsz(2) - do i=1,decomp%zsz(1) - do k=1,decomp%zsz(3) - buf(k) = inout(i,j,k) - end do - call spcfft(buf,decomp%zsz(3),isign,scratch) - do k=1,decomp%zsz(3) - inout(i,j,k) = buf(k) - end do - end do - end do - - return - - end subroutine c2c_1m_z - - ! r2c transform, multiple 1D FFTs in x direction - subroutine r2c_1m_x(input, output) - - implicit none - - real(mytype), dimension(:,:,:), intent(IN) :: input - complex(mytype), dimension(:,:,:), intent(OUT) :: output - - integer :: i,j,k, s1,s2,s3, d1 - - s1 = size(input,1) - s2 = size(input,2) - s3 = size(input,3) - d1 = size(output,1) - - do k=1,s3 - do j=1,s2 - ! Glassman's FFT is c2c only, - ! needing some pre- and post-processing for r2c - ! pack real input in complex storage - do i=1,s1 - buf(i) = cmplx(input(i,j,k),0._mytype, kind=mytype) - end do - call spcfft(buf,s1,-1,scratch) - ! note d1 ~ s1/2+1 - ! simply drop the redundant part of the complex output - do i=1,d1 - output(i,j,k) = buf(i) - end do - end do - end do - - return - - end subroutine r2c_1m_x - - ! r2c transform, multiple 1D FFTs in z direction - subroutine r2c_1m_z(input, output) - - implicit none - - real(mytype), dimension(:,:,:), intent(IN) :: input - complex(mytype), dimension(:,:,:), intent(OUT) :: output - - integer :: i,j,k, s1,s2,s3, d3 - - s1 = size(input,1) - s2 = size(input,2) - s3 = size(input,3) - d3 = size(output,3) - - do j=1,s2 - do i=1,s1 - ! Glassman's FFT is c2c only, - ! needing some pre- and post-processing for r2c - ! pack real input in complex storage - do k=1,s3 - buf(k) = cmplx(input(i,j,k),0._mytype, kind=mytype) - end do - call spcfft(buf,s3,-1,scratch) - ! note d3 ~ s3/2+1 - ! simply drop the redundant part of the complex output - do k=1,d3 - output(i,j,k) = buf(k) - end do - end do - end do - - return - - end subroutine r2c_1m_z - - ! c2r transform, multiple 1D FFTs in x direction - subroutine c2r_1m_x(input, output) - - implicit none - - complex(mytype), dimension(:,:,:), intent(IN) :: input - real(mytype), dimension(:,:,:), intent(OUT) :: output - - integer :: i,j,k, d1,d2,d3 - - d1 = size(output,1) - d2 = size(output,2) - d3 = size(output,3) - - do k=1,d3 - do j=1,d2 - ! Glassman's FFT is c2c only, - ! needing some pre- and post-processing for c2r - do i=1,d1/2+1 - buf(i) = input(i,j,k) - end do - ! expanding to a full-size complex array - ! For odd N, the storage is: - ! 1, 2, ...... N/2+1 integer division rounded down - ! N, ...... N/2+2 => a(i) is conjugate of a(N+2-i) - ! For even N, the storage is: - ! 1, 2, ...... N/2 , N/2+1 - ! N, ...... N/2+2 again a(i) conjugate of a(N+2-i) - do i=d1/2+2,d1 - buf(i) = conjg(buf(d1+2-i)) - end do - call spcfft(buf,d1,1,scratch) - do i=1,d1 - ! simply drop imaginary part - output(i,j,k) = real(buf(i), kind=mytype) - end do - end do - end do - - return - - end subroutine c2r_1m_x - - ! c2r transform, multiple 1D FFTs in z direction - subroutine c2r_1m_z(input, output) - - implicit none - - complex(mytype), dimension(:,:,:), intent(IN) :: input - real(mytype), dimension(:,:,:), intent(OUT) :: output - - integer :: i,j,k, d1,d2,d3 - - d1 = size(output,1) - d2 = size(output,2) - d3 = size(output,3) - - do j=1,d2 - do i=1,d1 - do k=1,d3/2+1 - buf(k) = input(i,j,k) - end do - do k=d3/2+2,d3 - buf(k) = conjg(buf(d3+2-k)) - end do - call spcfft(buf,d3,1,scratch) - do k=1,d3 - output(i,j,k) = real(buf(k), kind=mytype) - end do - end do - end do - - return - - end subroutine c2r_1m_z - - -#include "fft_common_3d.inc" - - -end module decomp_2d_fft diff --git a/decomp2d/fft_mkl.f90 b/decomp2d/fft_mkl.f90 deleted file mode 100644 index 5a95774..0000000 --- a/decomp2d/fft_mkl.f90 +++ /dev/null @@ -1,583 +0,0 @@ -!======================================================================= -! This is part of the 2DECOMP&FFT library -! -! 2DECOMP&FFT is a software framework for general-purpose 2D (pencil) -! decomposition. It also implements a highly scalable distributed -! three-dimensional Fast Fourier Transform (FFT). -! -! Copyright (C) 2009-2012 Ning Li, the Numerical Algorithms Group (NAG) -! -!======================================================================= - -! This is the Intel MKL implementation of the FFT library - -module decomp_2d_fft - - use decomp_2d ! 2D decomposition module - use MKL_DFTI ! MKL FFT module - - implicit none - - private ! Make everything private unless declared public - - ! engine-specific global variables - - ! Descriptors for MKL FFT, one for each set of 1D FFTs - ! for c2c transforms - type(DFTI_DESCRIPTOR), pointer :: c2c_x, c2c_y, c2c_z - ! for r2c/c2r transforms, PHYSICAL_IN_X - type(DFTI_DESCRIPTOR), pointer :: r2c_x, c2c_y2, c2c_z2, c2r_x - ! for r2c/c2r transforms, PHYSICAL_IN_Z - type(DFTI_DESCRIPTOR), pointer :: r2c_z, c2c_x2, c2r_z - - ! common code used for all engines, including global variables, - ! generic interface definitions and several subroutines -#include "fft_common.inc" - -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - ! This routine performs one-time initialisations for the FFT engine -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - subroutine init_fft_engine - - implicit none - - if (nrank==0) then - write(*,*) ' ' - write(*,*) '***** Using the MKL engine *****' - write(*,*) ' ' - end if - - ! For C2C transforms - call c2c_1m_x_plan(c2c_x, ph) - call c2c_1m_y_plan(c2c_y, ph) - call c2c_1m_z_plan(c2c_z, ph) - - ! For R2C/C2R tranfroms with physical space in X-pencil - if (format == PHYSICAL_IN_X) then - call r2c_1m_x_plan(r2c_x, ph, sp, -1) - call c2c_1m_y_plan(c2c_y2, sp) - call c2c_1m_z_plan(c2c_z2, sp) - call r2c_1m_x_plan(c2r_x, ph, sp, 1) - - ! For R2C/C2R tranfroms with physical space in Z-pencil - else if (format == PHYSICAL_IN_Z) then - call r2c_1m_z_plan(r2c_z, ph, sp, -1) - call c2c_1m_y_plan(c2c_y2, sp) - call c2c_1m_x_plan(c2c_x2, sp) - call r2c_1m_z_plan(c2r_z, ph, sp, 1) - end if - - return - end subroutine init_fft_engine - - - ! Return an MKL plan for multiple 1D c2c FFTs in X direction - subroutine c2c_1m_x_plan(desc, decomp) - - implicit none - - type(DFTI_DESCRIPTOR), pointer :: desc - TYPE(DECOMP_INFO), intent(IN) :: decomp - - integer :: status - -#ifdef DOUBLE_PREC - status = DftiCreateDescriptor(desc, DFTI_DOUBLE, & - DFTI_COMPLEX, 1, decomp%xsz(1)) -#else - status = DftiCreateDescriptor(desc, DFTI_SINGLE, & - DFTI_COMPLEX, 1, decomp%xsz(1)) -#endif - status = DftiSetValue(desc, DFTI_NUMBER_OF_TRANSFORMS, & - decomp%xsz(2)*decomp%xsz(3)) - status = DftiSetValue(desc, DFTI_PLACEMENT, DFTI_NOT_INPLACE) - status = DftiSetValue(desc, DFTI_INPUT_DISTANCE, decomp%xsz(1)) - status = DftiSetValue(desc, DFTI_OUTPUT_DISTANCE, decomp%xsz(1)) - status = DftiCommitDescriptor(desc) - - return - end subroutine c2c_1m_x_plan - - ! Return an MKL plan for multiple 1D c2c FFTs in Y direction - subroutine c2c_1m_y_plan(desc, decomp) - - implicit none - - type(DFTI_DESCRIPTOR), pointer :: desc - TYPE(DECOMP_INFO), intent(IN) :: decomp - - integer :: status, strides(2) - - ! Due to memory pattern of 3D arrays, 1D FFTs along Y have to be - ! done one Z-plane at a time. So plan for 2D data sets here. - -#ifdef DOUBLE_PREC - status = DftiCreateDescriptor(desc, DFTI_DOUBLE, & - DFTI_COMPLEX, 1, decomp%ysz(2)) -#else - status = DftiCreateDescriptor(desc, DFTI_SINGLE, & - DFTI_COMPLEX, 1, decomp%ysz(2)) -#endif - status = DftiSetValue(desc, DFTI_PLACEMENT, DFTI_NOT_INPLACE) - status = DftiSetValue(desc, DFTI_NUMBER_OF_TRANSFORMS, decomp%ysz(1)) - status = DftiSetValue(desc, DFTI_INPUT_DISTANCE, 1) - status = DftiSetValue(desc, DFTI_OUTPUT_DISTANCE, 1) - strides(1) = 0 - strides(2) = decomp%ysz(1) - status = DftiSetValue(desc, DFTI_INPUT_STRIDES, strides) - status = DftiSetValue(desc, DFTI_OUTPUT_STRIDES, strides) - status = DftiCommitDescriptor(desc) - - return - end subroutine c2c_1m_y_plan - - ! Return an MKL plan for multiple 1D c2c FFTs in Z direction - subroutine c2c_1m_z_plan(desc, decomp) - - implicit none - - type(DFTI_DESCRIPTOR), pointer :: desc - TYPE(DECOMP_INFO), intent(IN) :: decomp - - integer :: status, strides(2) - -#ifdef DOUBLE_PREC - status = DftiCreateDescriptor(desc, DFTI_DOUBLE, & - DFTI_COMPLEX, 1, decomp%zsz(3)) -#else - status = DftiCreateDescriptor(desc, DFTI_SINGLE, & - DFTI_COMPLEX, 1, decomp%zsz(3)) -#endif - status = DftiSetValue(desc, DFTI_PLACEMENT, DFTI_NOT_INPLACE) - status = DftiSetValue(desc, DFTI_NUMBER_OF_TRANSFORMS, & - decomp%zsz(1)*decomp%zsz(2)) - status = DftiSetValue(desc, DFTI_INPUT_DISTANCE, 1) - status = DftiSetValue(desc, DFTI_OUTPUT_DISTANCE, 1) - strides(1) = 0 - strides(2) = decomp%zsz(1)*decomp%zsz(2) - status = DftiSetValue(desc, DFTI_INPUT_STRIDES, strides) - status = DftiSetValue(desc, DFTI_OUTPUT_STRIDES, strides) - status = DftiCommitDescriptor(desc) - - return - end subroutine c2c_1m_z_plan - - ! Return an MKL plan for multiple 1D r2c FFTs in X direction - subroutine r2c_1m_x_plan(desc, decomp_ph, decomp_sp, direction) - - implicit none - - type(DFTI_DESCRIPTOR), pointer :: desc - TYPE(DECOMP_INFO), intent(IN) :: decomp_ph, decomp_sp - integer, intent(IN) :: direction ! (-1=r2c; 1=c2r) - - integer :: status - - ! c2r and r2c plans are almost the same, just swap input/output - -#ifdef DOUBLE_PREC - status = DftiCreateDescriptor(desc, DFTI_DOUBLE, & - DFTI_REAL, 1, decomp_ph%xsz(1)) -#else - status = DftiCreateDescriptor(desc, DFTI_SINGLE, & - DFTI_REAL, 1, decomp_ph%xsz(1)) -#endif - status = DftiSetValue(desc, DFTI_NUMBER_OF_TRANSFORMS, & - decomp_ph%xsz(2)*decomp_ph%xsz(3)) - status = DftiSetValue(desc, DFTI_CONJUGATE_EVEN_STORAGE, & - DFTI_COMPLEX_COMPLEX) - status = DftiSetValue(desc, DFTI_PLACEMENT, DFTI_NOT_INPLACE) - if (direction == -1) then ! r2c - status = DftiSetValue(desc, DFTI_INPUT_DISTANCE, & - decomp_ph%xsz(1)) - status = DftiSetValue(desc, DFTI_OUTPUT_DISTANCE, & - decomp_sp%xsz(1)) - else if (direction == 1) then ! c2r - status = DftiSetValue(desc, DFTI_INPUT_DISTANCE, & - decomp_sp%xsz(1)) - status = DftiSetValue(desc, DFTI_OUTPUT_DISTANCE, & - decomp_ph%xsz(1)) - end if - status = DftiCommitDescriptor(desc) - - return - end subroutine r2c_1m_x_plan - - ! Return an MKL plan for multiple 1D r2c FFTs in Z direction - subroutine r2c_1m_z_plan(desc, decomp_ph, decomp_sp, direction) - - implicit none - - type(DFTI_DESCRIPTOR), pointer :: desc - TYPE(DECOMP_INFO), intent(IN) :: decomp_ph, decomp_sp - integer, intent(IN) :: direction ! (-1=r2c; 1=c2r) - - integer :: status, strides(2) - - ! c2r and r2c plans are almost the same, just swap input/output - -#ifdef DOUBLE_PREC - status = DftiCreateDescriptor(desc, DFTI_DOUBLE, & - DFTI_REAL, 1, decomp_ph%zsz(3)) -#else - status = DftiCreateDescriptor(desc, DFTI_SINGLE, & - DFTI_REAL, 1, decomp_ph%zsz(3)) -#endif - status = DftiSetValue(desc, DFTI_NUMBER_OF_TRANSFORMS, & - decomp_ph%zsz(1)*decomp_ph%zsz(2)) - status = DftiSetValue(desc, DFTI_CONJUGATE_EVEN_STORAGE, & - DFTI_COMPLEX_COMPLEX) - status = DftiSetValue(desc, DFTI_PLACEMENT, DFTI_NOT_INPLACE) - status = DftiSetValue(desc, DFTI_INPUT_DISTANCE, 1) - status = DftiSetValue(desc, DFTI_OUTPUT_DISTANCE, 1) - strides(1) = 0 - strides(2) = decomp_ph%zsz(1)*decomp_ph%zsz(2) - if (direction == -1) then - status = DftiSetValue(desc, DFTI_INPUT_STRIDES, strides) - else if (direction == 1) then - status = DftiSetValue(desc, DFTI_OUTPUT_STRIDES, strides) - end if - strides(2) = decomp_sp%zsz(1)*decomp_sp%zsz(2) - if (direction == -1) then - status = DftiSetValue(desc, DFTI_OUTPUT_STRIDES, strides) - else if (direction == 1) then - status = DftiSetValue(desc, DFTI_INPUT_STRIDES, strides) - end if - status = DftiCommitDescriptor(desc) - - return - end subroutine r2c_1m_z_plan - - -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - ! This routine performs one-time finalisations for the FFT engine -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - subroutine finalize_fft_engine - - implicit none - - integer :: status - - status = DftiFreeDescriptor(c2c_x) - status = DftiFreeDescriptor(c2c_y) - status = DftiFreeDescriptor(c2c_z) - if (format==PHYSICAL_IN_X) then - status = DftiFreeDescriptor(r2c_x) - status = DftiFreeDescriptor(c2c_z2) - status = DftiFreeDescriptor(c2r_x) - else if (format==PHYSICAL_IN_Z) then - status = DftiFreeDescriptor(r2c_z) - status = DftiFreeDescriptor(c2c_x2) - status = DftiFreeDescriptor(c2r_z) - end if - status = DftiFreeDescriptor(c2c_y2) - - return - end subroutine finalize_fft_engine - - -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - ! 3D FFT - complex to complex -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - subroutine fft_3d_c2c(in, out, isign) - - implicit none - - complex(mytype), dimension(:,:,:), intent(IN) :: in - complex(mytype), dimension(:,:,:), intent(OUT) :: out - integer, intent(IN) :: isign - - complex(mytype), allocatable, dimension(:,:,:) :: wk1,wk2,wk2b,wk3 - integer :: k, status - - if (format==PHYSICAL_IN_X .AND. isign==DECOMP_2D_FFT_FORWARD .OR. & - format==PHYSICAL_IN_Z .AND. isign==DECOMP_2D_FFT_BACKWARD) then - - ! ===== 1D FFTs in X ===== - allocate (wk1(ph%xsz(1),ph%xsz(2),ph%xsz(3))) - ! if (isign==DECOMP_2D_FFT_FORWARD) then - ! status = DftiComputeForward(c2c_x, in(:,1,1), wk1(:,1,1)) - ! else if (isign==DECOMP_2D_FFT_BACKWARD) then - ! status = DftiComputeBackward(c2c_x, in(:,1,1), wk1(:,1,1)) - ! end if - status = wrapper_c2c(c2c_x, in, wk1, isign) - - ! ===== Swap X --> Y ===== - allocate (wk2(ph%ysz(1),ph%ysz(2),ph%ysz(3))) - call transpose_x_to_y(wk1,wk2,ph) - - ! ===== 1D FFTs in Y ===== - allocate (wk2b(ph%ysz(1),ph%ysz(2),ph%ysz(3))) - do k=1,ph%xsz(3) ! one Z-plane at a time - ! if (isign==DECOMP_2D_FFT_FORWARD) then - ! status = DftiComputeForward(c2c_y, wk2(:,1,k), wk2b(:,1,k)) - ! else if (isign==DECOMP_2D_FFT_BACKWARD) then - ! status = DftiComputeBackward(c2c_y, wk2(:,1,k), wk2b(:,1,k)) - ! end if - status = wrapper_c2c(c2c_y, wk2(1,1,k), wk2b(1,1,k), isign) - end do - - ! ===== Swap Y --> Z ===== - allocate (wk3(ph%zsz(1),ph%zsz(2),ph%zsz(3))) - call transpose_y_to_z(wk2b,wk3,ph) - - ! ===== 1D FFTs in Z ===== - ! if (isign==DECOMP_2D_FFT_FORWARD) then - ! status = DftiComputeForward(c2c_z, wk3(:,1,1), out(:,1,1)) - ! else if (isign==DECOMP_2D_FFT_BACKWARD) then - ! status = DftiComputeBackward(c2c_z, wk3(:,1,1), out(:,1,1)) - ! end if - status = wrapper_c2c(c2c_z, wk3, out, isign) - - else if (format==PHYSICAL_IN_X .AND. isign==DECOMP_2D_FFT_BACKWARD & - .OR. & - format==PHYSICAL_IN_Z .AND. isign==DECOMP_2D_FFT_FORWARD) then - - ! ===== 1D FFTs in Z ===== - allocate (wk1(ph%zsz(1),ph%zsz(2),ph%zsz(3))) - ! if (isign==DECOMP_2D_FFT_FORWARD) then - ! status = DftiComputeForward(c2c_z, in(:,1,1), wk1(:,1,1)) - ! else if (isign==DECOMP_2D_FFT_BACKWARD) then - ! status = DftiComputeBackward(c2c_z, in(:,1,1), wk1(:,1,1)) - ! end if - status = wrapper_c2c(c2c_z, in, wk1, isign) - - ! ===== Swap Z --> Y ===== - allocate (wk2(ph%ysz(1),ph%ysz(2),ph%ysz(3))) - call transpose_z_to_y(wk1,wk2,ph) - - ! ===== 1D FFTs in Y ===== - allocate (wk2b(ph%ysz(1),ph%ysz(2),ph%ysz(3))) - do k=1,ph%xsz(3) ! one Z-plane at a time - ! if (isign==DECOMP_2D_FFT_FORWARD) then - ! status = DftiComputeForward(c2c_y, wk2(:,1,k), wk2b(:,1,k)) - ! else if (isign==DECOMP_2D_FFT_BACKWARD) then - ! status = DftiComputeBackward(c2c_y, wk2(:,1,k), wk2b(:,1,k)) - ! end if - status = wrapper_c2c(c2c_y, wk2(1,1,k), wk2b(1,1,k), isign) - end do - - ! ===== Swap Y --> X ===== - allocate (wk3(ph%xsz(1),ph%xsz(2),ph%xsz(3))) - call transpose_y_to_x(wk2b,wk3,ph) - - ! ===== 1D FFTs in X ===== - ! if (isign==DECOMP_2D_FFT_FORWARD) then - ! status = DftiComputeForward(c2c_x, wk3(:,1,1), out(:,1,1)) - ! else if (isign==DECOMP_2D_FFT_BACKWARD) then - ! status = DftiComputeBackward(c2c_x, wk3(:,1,1), out(:,1,1)) - ! end if - status = wrapper_c2c(c2c_x, wk3, out, isign) - - end if - - return - end subroutine fft_3d_c2c - - -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - ! 3D forward FFT - real to complex -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - subroutine fft_3d_r2c(in_r, out_c) - - implicit none - - real(mytype), dimension(:,:,:), intent(IN) :: in_r - complex(mytype), dimension(:,:,:), intent(OUT) :: out_c - - complex(mytype), allocatable, dimension(:,:,:) :: wk1,wk2,wk2b,wk3 - integer :: k, status, isign - - isign = DECOMP_2D_FFT_FORWARD - - if (format==PHYSICAL_IN_X) then - - ! ===== 1D FFTs in X ===== - allocate(wk1(sp%xsz(1),sp%xsz(2),sp%xsz(3))) - ! status = DftiComputeForward(r2c_x, in_r(:,1,1), wk1(:,1,1)) - status = wrapper_r2c(r2c_x, in_r, wk1) - - ! ===== Swap X --> Y ===== - allocate (wk2(sp%ysz(1),sp%ysz(2),sp%ysz(3))) - call transpose_x_to_y(wk1,wk2,sp) - - ! ===== 1D FFTs in Y ===== - allocate (wk2b(sp%ysz(1),sp%ysz(2),sp%ysz(3))) - do k=1,sp%ysz(3) - ! status = DftiComputeForward(c2c_y2, wk2(:,1,k), wk2b(:,1,k)) - status = wrapper_c2c(c2c_y2, wk2(1,1,k), wk2b(1,1,k), isign) - end do - - ! ===== Swap Y --> Z ===== - allocate (wk3(sp%zsz(1),sp%zsz(2),sp%zsz(3))) - call transpose_y_to_z(wk2b,wk3,sp) - - ! ===== 1D FFTs in Z ===== - ! status = DftiComputeForward(c2c_z2, wk3(:,1,1), out_c(:,1,1)) - status = wrapper_c2c(c2c_z2, wk3, out_c, isign) - - else if (format==PHYSICAL_IN_Z) then - - ! ===== 1D FFTs in Z ===== - allocate(wk1(sp%zsz(1),sp%zsz(2),sp%zsz(3))) - ! status = DftiComputeForward(r2c_z, in_r(:,1,1), wk1(:,1,1)) - status = wrapper_r2c(r2c_z, in_r, wk1) - - ! ===== Swap Z --> Y ===== - allocate (wk2(sp%ysz(1),sp%ysz(2),sp%ysz(3))) - call transpose_z_to_y(wk1,wk2,sp) - - ! ===== 1D FFTs in Y ===== - allocate (wk2b(sp%ysz(1),sp%ysz(2),sp%ysz(3))) - do k=1,sp%ysz(3) - ! status = DftiComputeForward(c2c_y2, wk2(:,1,k), wk2b(:,1,k)) - status = wrapper_c2c(c2c_y2, wk2(1,1,k), wk2b(1,1,k), isign) - end do - - ! ===== Swap Y --> X ===== - allocate (wk3(sp%xsz(1),sp%xsz(2),sp%xsz(3))) - call transpose_y_to_x(wk2b,wk3,sp) - - ! ===== 1D FFTs in X ===== - ! status = DftiComputeForward(c2c_x2, wk3(:,1,1), out_c(:,1,1)) - status = wrapper_c2c(c2c_x2, wk3, out_c, isign) - - end if - - return - end subroutine fft_3d_r2c - - -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - ! 3D inverse FFT - complex to real -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - subroutine fft_3d_c2r(in_c, out_r) - - implicit none - - complex(mytype), dimension(:,:,:), intent(IN) :: in_c - real(mytype), dimension(:,:,:), intent(OUT) :: out_r - - complex(mytype), allocatable, dimension(:,:,:) :: wk1,wk2,wk2b,wk3 - integer :: k, status, isign - - isign = DECOMP_2D_FFT_BACKWARD - - if (format==PHYSICAL_IN_X) then - - ! ===== 1D FFTs in Z ===== - allocate (wk1(sp%zsz(1),sp%zsz(2),sp%zsz(3))) - ! status = DftiComputeBackward(c2c_z2, in_c(:,1,1), wk1(:,1,1)) - status = wrapper_c2c(c2c_z2, in_c, wk1, isign) - - ! ===== Swap Z --> Y ===== - allocate (wk2(sp%ysz(1),sp%ysz(2),sp%ysz(3))) - call transpose_z_to_y(wk1,wk2,sp) - - ! ===== 1D FFTs in Y ===== - allocate (wk2b(sp%ysz(1),sp%ysz(2),sp%ysz(3))) - do k=1,sp%ysz(3) - ! status = DftiComputeBackward(c2c_y2, wk2(:,1,k), wk2b(:,1,k)) - status = wrapper_c2c(c2c_y2, wk2(1,1,k), wk2b(1,1,k), isign) - end do - - ! ===== Swap Y --> X ===== - allocate (wk3(sp%xsz(1),sp%xsz(2),sp%xsz(3))) - call transpose_y_to_x(wk2b,wk3,sp) - - ! ===== 1D FFTs in X ===== - ! status = DftiComputeBackward(c2r_x, wk3(:,1,1), out_r(:,1,1)) - status = wrapper_c2r(c2r_x, wk3, out_r) - - else if (format==PHYSICAL_IN_Z) then - - ! ===== 1D FFTs in X ===== - allocate(wk1(sp%xsz(1),sp%xsz(2),sp%xsz(3))) - ! status = DftiComputeBackward(c2c_x2, in_c(:,1,1), wk1(:,1,1)) - status = wrapper_c2c(c2c_x2, in_c, wk1, isign) - - ! ===== Swap X --> Y ===== - allocate (wk2(sp%ysz(1),sp%ysz(2),sp%ysz(3))) - call transpose_x_to_y(wk1,wk2,sp) - - ! ===== 1D FFTs in Y ===== - allocate (wk2b(sp%ysz(1),sp%ysz(2),sp%ysz(3))) - do k=1,sp%ysz(3) - ! status = DftiComputeBackward(c2c_y2, wk2(:,1,k), wk2b(:,1,k)) - status = wrapper_c2c(c2c_y2, wk2(1,1,k), wk2b(1,1,k), isign) - end do - - ! ===== Swap Y --> Z ===== - allocate (wk3(sp%zsz(1),sp%zsz(2),sp%zsz(3))) - call transpose_y_to_z(wk2b,wk3,sp) - - ! ===== 1D FFTs in Z ===== - ! status = DftiComputeBackward(c2r_z, wk3(:,1,1), out_r(:,1,1)) - status = wrapper_c2r(c2r_z, wk3, out_r) - - end if - - return - end subroutine fft_3d_c2r - - -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - ! Wrapper functions so that one can pass 3D arrays to DftiCompute - ! -- MKL accepts only 1D arrays as input/output for its multi- - ! dimensional FFTs. - ! -- Using EQUIVALENCE as suggested by MKL documents is impossible - ! for allocated arrays, not to mention bad coding style - ! -- All code commented out above may well work but not safe. There - ! is no guarantee that compiler wouldn't make copies of 1D arrays - ! (which would contain only one slice of the original 3D data) - ! rather than referring to the same memory address, i.e. 3D array - ! A and 1D array A(:,1,1) may refer to different memory location. - ! -- Using the following wrappers is safe and standard conforming. -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - integer function wrapper_c2c(desc, in, out, isign) - - implicit none - - type(DFTI_DESCRIPTOR), pointer :: desc - complex(mytype), dimension(*) :: in, out - integer :: isign, status - - if (isign == DECOMP_2D_FFT_FORWARD) then - status = DftiComputeForward(desc, in, out) - else if (isign == DECOMP_2D_FFT_BACKWARD) then - status = DftiComputeBackward(desc, in, out) - end if - - wrapper_c2c = status - - return - end function wrapper_c2c - - integer function wrapper_r2c(desc, in, out) - - implicit none - - type(DFTI_DESCRIPTOR), pointer :: desc - real(mytype), dimension(*) :: in - complex(mytype), dimension(*) :: out - - wrapper_r2c = DftiComputeForward(desc, in, out) - - return - end function wrapper_r2c - - integer function wrapper_c2r(desc, in, out) - - implicit none - - type(DFTI_DESCRIPTOR), pointer :: desc - complex(mytype), dimension(*) :: in - real(mytype), dimension(*) :: out - - wrapper_c2r = DftiComputeBackward(desc, in, out) - - return - end function wrapper_c2r - -end module decomp_2d_fft diff --git a/decomp2d/glassman.f90 b/decomp2d/glassman.f90 deleted file mode 100644 index 05545e8..0000000 --- a/decomp2d/glassman.f90 +++ /dev/null @@ -1,184 +0,0 @@ -!======================================================================= -! This is part of the 2DECOMP&FFT library -! -! 2DECOMP&FFT is a software framework for general-purpose 2D (pencil) -! decomposition. It also implements a highly scalable distributed -! three-dimensional Fast Fourier Transform (FFT). -! -! Copyright (C) 2009-2011 Ning Li, the Numerical Algorithms Group (NAG) -! -!======================================================================= - -! This module contains a few 'generic' FFT routines, making the -! 2DECOMP&FFT library not dependent on any external libraries - -module glassman - - use decomp_2d, only : mytype - - implicit none - -contains - -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - ! Following is a FFT implementation based on algorithm proposed by - ! Glassman, a general FFT algorithm supporting arbitrary input length. - ! - ! W. E. Ferguson, Jr., "A simple derivation of Glassman general-n fast - ! Fourier transform," Comput. and Math. with Appls., vol. 8, no. 6, pp. - ! 401-411, 1982. - ! - ! Original implemtation online at http://www.jjj.de/fft/fftpage.html - ! - ! Updated - ! - to handle double-precision as well - ! - unnecessary scaling code removed -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - SUBROUTINE SPCFFT(U,N,ISIGN,WORK) - - IMPLICIT NONE - - LOGICAL :: INU - INTEGER :: A,B,C,N,I,ISIGN - COMPLEX(mytype) :: U(*),WORK(*) - - A = 1 - B = N - C = 1 - INU = .TRUE. - - DO WHILE ( B .GT. 1 ) - A = C * A - C = 2 - DO WHILE ( MOD(B,C) .NE. 0 ) - C = C + 1 - END DO - B = B / C - IF ( INU ) THEN - CALL SPCPFT (A,B,C,U,WORK,ISIGN) - ELSE - CALL SPCPFT (A,B,C,WORK,U,ISIGN) - END IF - INU = ( .NOT. INU ) - END DO - - IF ( .NOT. INU ) THEN - DO I = 1, N - U(I) = WORK(I) - END DO - END IF - - RETURN - END SUBROUTINE SPCFFT - - - SUBROUTINE SPCPFT( A, B, C, UIN, UOUT, ISIGN ) - - IMPLICIT NONE - - INTEGER :: ISIGN,A,B,C,IA,IB,IC,JCR,JC - - DOUBLE PRECISION :: ANGLE - - COMPLEX(mytype) :: UIN(B,C,A),UOUT(B,A,C),DELTA,OMEGA,SUM - - ANGLE = 6.28318530717958_mytype / REAL( A * C, kind=mytype ) - OMEGA = CMPLX( 1.0, 0.0, kind=mytype ) - - IF( ISIGN .EQ. 1 ) THEN - DELTA = CMPLX( DCOS(ANGLE), DSIN(ANGLE), kind=mytype ) - ELSE - DELTA = CMPLX( DCOS(ANGLE), -DSIN(ANGLE), kind=mytype ) - END IF - - DO IC = 1, C - DO IA = 1, A - DO IB = 1, B - SUM = UIN( IB, C, IA ) - DO JCR = 2, C - JC = C + 1 - JCR - SUM = UIN( IB, JC, IA ) + OMEGA * SUM - END DO - UOUT( IB, IA, IC ) = SUM - END DO - OMEGA = DELTA * OMEGA - END DO - END DO - - RETURN - END SUBROUTINE SPCPFT - - -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - ! A 3D real-to-complex routine implemented using the 1D FFT above - ! Input: nx*ny*nz real numbers - ! Output: (nx/2+1)*ny*nz complex numbers - ! Just like big FFT libraries (such as FFTW) do -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - subroutine glassman_3d_r2c(in_r,nx,ny,nz,out_c) - - implicit none - - integer, intent(IN) :: nx,ny,nz - real(mytype), dimension(nx,ny,nz) :: in_r - complex(mytype), dimension(nx/2+1,ny,nz) :: out_c - - complex(mytype), allocatable, dimension(:) :: buf, scratch - integer :: maxsize, i,j,k - - maxsize = max(nx, max(ny,nz)) - allocate(buf(maxsize)) - allocate(scratch(maxsize)) - - ! ===== 1D FFTs in X ===== - do k=1,nz - do j=1,ny - ! Glassman's 1D FFT is c2c only, - ! needing some pre- and post-processing for r2c - ! pack real input in complex storage - do i=1,nx - buf(i) = cmplx(in_r(i,j,k),0._mytype, kind=mytype) - end do - call spcfft(buf,nx,-1,scratch) - ! simply drop the redundant part of the complex output - do i=1,nx/2+1 - out_c(i,j,k) = buf(i) - end do - end do - end do - - ! ===== 1D FFTs in Y ===== - do k=1,nz - do i=1,nx/2+1 - do j=1,ny - buf(j) = out_c(i,j,k) - end do - call spcfft(buf,ny,-1,scratch) - do j=1,ny - out_c(i,j,k) = buf(j) - end do - end do - end do - - ! ===== 1D FFTs in Z ===== - do j=1,ny - do i=1,nx/2+1 - do k=1,nz - buf(k) = out_c(i,j,k) - end do - call spcfft(buf,nz,-1,scratch) - do k=1,nz - out_c(i,j,k) = buf(k) - end do - end do - end do - - deallocate(buf,scratch) - - return - end subroutine glassman_3d_r2c - - -end module glassman - diff --git a/decomp2d/halo.inc b/decomp2d/halo.inc deleted file mode 100644 index 9fd4b76..0000000 --- a/decomp2d/halo.inc +++ /dev/null @@ -1,115 +0,0 @@ -!======================================================================= -! This is part of the 2DECOMP&FFT library -! -! 2DECOMP&FFT is a software framework for general-purpose 2D (pencil) -! decomposition. It also implements a highly scalable distributed -! three-dimensional Fast Fourier Transform (FFT). -! -! Copyright (C) 2009-2011 Ning Li, the Numerical Algorithms Group (NAG) -! -!======================================================================= - -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! -! Halo cell support for neighbouring pencils to exchange data -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! -subroutine update_halo_real(in, out, level, opt_decomp, opt_global) - -implicit none - -integer, intent(IN) :: level ! levels of halo cells required -real(mytype), dimension(:,:,:), intent(IN) :: in -real(mytype), allocatable, dimension(:,:,:), intent(OUT) :: out -TYPE(DECOMP_INFO), optional :: opt_decomp -logical, optional :: opt_global - -TYPE(DECOMP_INFO) :: decomp -logical :: global - -! starting/ending index of array with halo cells -integer :: xs, ys, zs, xe, ye, ze - -integer :: i, j, k, s1, s2, s3, ierror -integer :: data_type - -integer :: icount, ilength, ijump -integer :: halo12, halo21, halo31, halo32 -integer, dimension(4) :: requests -integer, dimension(MPI_STATUS_SIZE,4) :: status -integer :: tag_e, tag_w, tag_n, tag_s, tag_t, tag_b - -data_type = real_type - -#include "halo_common.inc" - -return -end subroutine update_halo_real - - -subroutine update_halo_complex(in, out, level, opt_decomp, opt_global) - -implicit none - -integer, intent(IN) :: level ! levels of halo cells required -complex(mytype), dimension(:,:,:), intent(IN) :: in -complex(mytype), allocatable, dimension(:,:,:), intent(OUT) :: out -TYPE(DECOMP_INFO), optional :: opt_decomp -logical, optional :: opt_global - -TYPE(DECOMP_INFO) :: decomp -logical :: global - -! starting/ending index of array with halo cells -integer :: xs, ys, zs, xe, ye, ze - -integer :: i, j, k, s1, s2, s3, ierror -integer :: data_type - -integer :: icount, ilength, ijump -integer :: halo12, halo21, halo31, halo32 -integer, dimension(4) :: requests -integer, dimension(MPI_STATUS_SIZE,4) :: status -integer :: tag_e, tag_w, tag_n, tag_s, tag_t, tag_b - -data_type = complex_type - -#include "halo_common.inc" - -return -end subroutine update_halo_complex - - - -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! -! To support halo-cell exchange: -! find the MPI ranks of neighbouring pencils -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! -subroutine init_neighbour - -integer :: ierror - -! For X-pencil -neighbour(1,1) = MPI_PROC_NULL ! east -neighbour(1,2) = MPI_PROC_NULL ! west -call MPI_CART_SHIFT(DECOMP_2D_COMM_CART_X, 0, 1, & -neighbour(1,4), neighbour(1,3), ierror) ! north & south -call MPI_CART_SHIFT(DECOMP_2D_COMM_CART_X, 1, 1, & -neighbour(1,6), neighbour(1,5), ierror) ! top & bottom - -! For Y-pencil -call MPI_CART_SHIFT(DECOMP_2D_COMM_CART_Y, 0, 1, & -neighbour(2,2), neighbour(2,1), ierror) ! east & west -neighbour(2,3) = MPI_PROC_NULL ! north -neighbour(2,4) = MPI_PROC_NULL ! south -call MPI_CART_SHIFT(DECOMP_2D_COMM_CART_Y, 1, 1, & -neighbour(2,6), neighbour(2,5), ierror) ! top & bottom - -! For Z-pencil -call MPI_CART_SHIFT(DECOMP_2D_COMM_CART_Z, 0, 1, & -neighbour(3,2), neighbour(3,1), ierror) ! east & west -call MPI_CART_SHIFT(DECOMP_2D_COMM_CART_Z, 1, 1, & -neighbour(3,4), neighbour(3,3), ierror) ! north & south -neighbour(3,5) = MPI_PROC_NULL ! top -neighbour(3,6) = MPI_PROC_NULL ! bottom - -return -end subroutine init_neighbour diff --git a/decomp2d/halo_common.inc b/decomp2d/halo_common.inc deleted file mode 100644 index 1064f07..0000000 --- a/decomp2d/halo_common.inc +++ /dev/null @@ -1,425 +0,0 @@ -!======================================================================= -! This is part of the 2DECOMP&FFT library -! -! 2DECOMP&FFT is a software framework for general-purpose 2D (pencil) -! decomposition. It also implements a highly scalable distributed -! three-dimensional Fast Fourier Transform (FFT). -! -! Copyright (C) 2009-2011 Ning Li, the Numerical Algorithms Group (NAG) -! -!======================================================================= - -! This file contain common code to be included by subroutines -! 'update_halo_...' in halo.inc - -if (present(opt_decomp)) then -decomp = opt_decomp -else -decomp = decomp_main -end if - -if (present(opt_global)) then -global = opt_global -else -global = .false. -end if - -s1 = size(in,1) -s2 = size(in,2) -s3 = size(in,3) - -! Calculate the starting index and ending index of output -if (s1==decomp%xsz(1)) then ! X-pencil input -if (global) then -xs = decomp%xst(1) -xe = decomp%xen(1) -ys = decomp%xst(2) - level -ye = decomp%xen(2) + level -zs = decomp%xst(3) - level -ze = decomp%xen(3) + level -else -xs = 1 -xe = s1 -ys = 1 - level -ye = s2 + level -zs = 1 - level -ze = s3 + level -end if -else if (s2==decomp%ysz(2)) then ! Y-pencil input -if (global) then -xs = decomp%yst(1) - level -xe = decomp%yen(1) + level -ys = decomp%yst(2) -ye = decomp%yen(2) -zs = decomp%yst(3) - level -ze = decomp%yen(3) + level -else -xs = 1 - level -xe = s1 + level -ys = 1 -ye = s2 -zs = 1 - level -ze = s3 + level -end if -else if (s3==decomp%zsz(3)) then ! Z-pencil input -if (global) then -xs = decomp%zst(1) - level -xe = decomp%zen(1) + level -ys = decomp%zst(2) - level -ye = decomp%zen(2) + level -zs = decomp%zst(3) -ze = decomp%zen(3) -else -xs = 1 - level -xe = s1 + level -ys = 1 - level -ye = s2 + level -zs = 1 -ze = s3 -end if -else -! invalid input -call decomp_2d_abort(10, & -'Invalid data passed to update_halo') -end if - - -allocate(out(xs:xe, ys:ye, zs:ze)) -! out = -1.0_mytype ! fill the halo for debugging - -! copy input data to output -if (global) then -! using global coordinate -! note the input array passed in always has index starting from 1 -! need to work out the corresponding global index -if (s1==decomp%xsz(1)) then -do k=decomp%xst(3),decomp%xen(3) -do j=decomp%xst(2),decomp%xen(2) -do i=1,s1 ! x all local -out(i,j,k) = in(i,j-decomp%xst(2)+1,k-decomp%xst(3)+1) -end do -end do -end do -else if (s2==decomp%ysz(2)) then -do k=decomp%yst(3),decomp%yen(3) -do j=1,s2 ! y all local -do i=decomp%yst(1),decomp%yen(1) -out(i,j,k) = in(i-decomp%yst(1)+1,j,k-decomp%yst(3)+1) -end do -end do -end do -else if (s3==decomp%zsz(3)) then -do k=1,s3 ! z all local -do j=decomp%zst(2),decomp%zen(2) -do i=decomp%zst(1),decomp%zen(1) -out(i,j,k) = in(i-decomp%zst(1)+1,j-decomp%zst(2)+1,k) -end do -end do -end do -end if -else -! not using global coordinate -do k=1,s3 -do j=1,s2 -do i=1,s1 -out(i,j,k) = in(i,j,k) -end do -end do -end do -end if - -! If needed, define MPI derived data type to pack halo data, -! then call MPI send/receive to exchange halo data - -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! -! X-pencil -if (s1==decomp%xsz(1)) then - -#ifdef HALO_DEBUG -if (nrank==4) then -write(*,*) 'X-pencil input' -write(*,*) '==============' -write(*,*) 'Data on a y-z plane is shown' -write(*,*) 'Before halo exchange' -do j=ye,ys,-1 -write(*,'(10F4.0)') (out(1,j,k),k=zs,ze) -end do -end if -#endif - -! *** east/west *** -! all data in local memory already, no halo exchange - -! *** north/south *** -tag_s = coord(1) -if (coord(1)==dims(1)-1 .AND. periodic_y) then -tag_n = 0 -else -tag_n = coord(1) + 1 -end if -icount = s3 + 2*level -ilength = level * s1 -ijump = s1*(s2+2*level) -call MPI_TYPE_VECTOR(icount, ilength, ijump, & -data_type, halo12, ierror) -call MPI_TYPE_COMMIT(halo12, ierror) -! receive from south -call MPI_IRECV(out(xs,ys,zs), 1, halo12, & -neighbour(1,4), tag_s, DECOMP_2D_COMM_CART_X, & -requests(1), ierror) -! receive from north -call MPI_IRECV(out(xs,ye-level+1,zs), 1, halo12, & -neighbour(1,3), tag_n, DECOMP_2D_COMM_CART_X, & -requests(2), ierror) -! send to south -call MPI_ISSEND(out(xs,ys+level,zs), 1, halo12, & -neighbour(1,4), tag_s, DECOMP_2D_COMM_CART_X, & -requests(3), ierror) -! send to north -call MPI_ISSEND(out(xs,ye-level-level+1,zs), 1, halo12, & -neighbour(1,3), tag_n, DECOMP_2D_COMM_CART_X, & -requests(4), ierror) -call MPI_WAITALL(4, requests, status, ierror) -call MPI_TYPE_FREE(halo12, ierror) -#ifdef HALO_DEBUG -if (nrank==4) then -write(*,*) 'After exchange in Y' -do j=ye,ys,-1 -write(*,'(10F4.0)') (out(1,j,k),k=zs,ze) -end do -end if -#endif - -! *** top/bottom *** -! no need to define derived data type as data on xy-planes -! all contiguous in memory, which can be sent/received using -! MPI directly -tag_b = coord(2) -if (coord(2)==dims(2)-1 .AND. periodic_z) then -tag_t = 0 -else -tag_t = coord(2) + 1 -end if -icount = (s1 * (s2+2*level)) * level -! receive from bottom -call MPI_IRECV(out(xs,ys,zs), icount, data_type, & -neighbour(1,6), tag_b, DECOMP_2D_COMM_CART_X, & -requests(1), ierror) -! receive from top -call MPI_IRECV(out(xs,ys,ze-level+1), icount, data_type, & -neighbour(1,5), tag_t, DECOMP_2D_COMM_CART_X, & -requests(2), ierror) -! send to bottom -call MPI_ISSEND(out(xs,ys,zs+level), icount, data_type, & -neighbour(1,6), tag_b, DECOMP_2D_COMM_CART_X, & -requests(3), ierror) -! send to top -call MPI_ISSEND(out(xs,ys,ze-level-level+1), icount, data_type, & -neighbour(1,5), tag_t, DECOMP_2D_COMM_CART_X, & -requests(4), ierror) -call MPI_WAITALL(4, requests, status, ierror) -#ifdef HALO_DEBUG -if (nrank==4) then -write(*,*) 'After exchange in Z' -do j=ye,ys,-1 -write(*,'(10F4.0)') (out(1,j,k),k=zs,ze) -end do -end if -#endif - -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! -! Y-pencil -else if (s2==decomp%ysz(2)) then - -#ifdef HALO_DEBUG -if (nrank==4) then -write(*,*) 'Y-pencil input' -write(*,*) '==============' -write(*,*) 'Data on a x-z plane is shown' -write(*,*) 'Before halo exchange' -do i=xe,xs,-1 -write(*,'(10F4.0)') (out(i,1,k),k=zs,ze) -end do -end if -#endif - -! *** east/west *** -tag_w = coord(1) -if (coord(1)==dims(1)-1 .AND. periodic_x) then -tag_e = 0 -else -tag_e = coord(1) + 1 -end if -icount = s2*(s3+2*level) -ilength = level -ijump = s1+2*level -call MPI_TYPE_VECTOR(icount, ilength, ijump, & -data_type, halo21, ierror) -call MPI_TYPE_COMMIT(halo21, ierror) -! receive from west -call MPI_IRECV(out(xs,ys,zs), 1, halo21, & -neighbour(2,2), tag_w, DECOMP_2D_COMM_CART_Y, & -requests(1), ierror) -! receive from east -call MPI_IRECV(out(xe-level+1,ys,zs), 1, halo21, & -neighbour(2,1), tag_e, DECOMP_2D_COMM_CART_Y, & -requests(2), ierror) -! send to west -call MPI_ISSEND(out(xs+level,ys,zs), 1, halo21, & -neighbour(2,2), tag_w, DECOMP_2D_COMM_CART_Y, & -requests(3), ierror) -! send to east -call MPI_ISSEND(out(xe-level-level+1,ys,zs), 1, halo21, & -neighbour(2,1), tag_e, DECOMP_2D_COMM_CART_Y, & -requests(4), ierror) -call MPI_WAITALL(4, requests, status, ierror) -call MPI_TYPE_FREE(halo21, ierror) -#ifdef HALO_DEBUG -if (nrank==4) then -write(*,*) 'After exchange in X' -do i=xe,xs,-1 -write(*,'(10F4.0)') (out(i,1,k),k=zs,ze) -end do -end if -#endif - -! *** north/south *** -! all data in local memory already, no halo exchange - -! *** top/bottom *** -! no need to define derived data type as data on xy-planes -! all contiguous in memory, which can be sent/received using -! MPI directly -tag_b = coord(2) -if (coord(2)==dims(2)-1 .AND. periodic_z) then -tag_t = 0 -else -tag_t = coord(2) + 1 -end if -icount = (s2 * (s1+2*level)) * level -! receive from bottom -call MPI_IRECV(out(xs,ys,zs), icount, data_type, & -neighbour(2,6), tag_b, DECOMP_2D_COMM_CART_Y, & -requests(1), ierror) -! receive from top -call MPI_IRECV(out(xs,ys,ze-level+1), icount, data_type, & -neighbour(2,5), tag_t, DECOMP_2D_COMM_CART_Y, & -requests(2), ierror) -! send to bottom -call MPI_ISSEND(out(xs,ys,zs+level), icount, data_type, & -neighbour(2,6), tag_b, DECOMP_2D_COMM_CART_Y, & -requests(3), ierror) -! send to top -call MPI_ISSEND(out(xs,ys,ze-level-level+1), icount, data_type, & -neighbour(2,5), tag_t, DECOMP_2D_COMM_CART_Y, & -requests(4), ierror) -call MPI_WAITALL(4, requests, status, ierror) -#ifdef HALO_DEBUG -if (nrank==4) then -write(*,*) 'After exchange in Z' -do i=xe,xs,-1 -write(*,'(10F4.0)') (out(i,1,k),k=zs,ze) -end do -end if -#endif - -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! -! Z-pencil -else if (s3==decomp%zsz(3)) then - -#ifdef HALO_DEBUG -if (nrank==4) then -write(*,*) 'Z-pencil input' -write(*,*) '==============' -write(*,*) 'Data on a x-y plane is shown' -write(*,*) 'Before halo exchange' -do i=xe,xs,-1 -write(*,'(10F4.0)') (out(i,j,1),j=ys,ye) -end do -end if -#endif - -! *** east/west *** -tag_w = coord(1) -if (coord(1)==dims(1)-1 .AND. periodic_x) then -tag_e = 0 -else -tag_e = coord(1) + 1 -end if -icount = (s2+2*level)*s3 -ilength = level -ijump = s1+2*level -call MPI_TYPE_VECTOR(icount, ilength, ijump, & -data_type, halo31, ierror) -call MPI_TYPE_COMMIT(halo31, ierror) -! receive from west -call MPI_IRECV(out(xs,ys,zs), 1, halo31, & -neighbour(3,2), tag_w, DECOMP_2D_COMM_CART_Z, & -requests(1), ierror) -! receive from east -call MPI_IRECV(out(xe-level+1,ys,zs), 1, halo31, & -neighbour(3,1), tag_e, DECOMP_2D_COMM_CART_Z, & -requests(2), ierror) -! send to west -call MPI_ISSEND(out(xs+level,ys,zs), 1, halo31, & -neighbour(3,2), tag_w, DECOMP_2D_COMM_CART_Z, & -requests(3), ierror) -! send to east -call MPI_ISSEND(out(xe-level-level+1,ys,zs), 1, halo31, & -neighbour(3,1), tag_e, DECOMP_2D_COMM_CART_Z, & -requests(4), ierror) -call MPI_WAITALL(4, requests, status, ierror) -call MPI_TYPE_FREE(halo31, ierror) -#ifdef HALO_DEBUG -if (nrank==4) then -write(*,*) 'After exchange in X' -do i=xe,xs,-1 -write(*,'(10F4.0)') (out(i,j,1),j=ys,ye) -end do -end if -#endif - -! *** north/south *** -tag_s = coord(2) -if (coord(2)==dims(2)-1 .AND. periodic_y) then -tag_n = 0 -else -tag_n = coord(2) + 1 -end if -icount = s3 -ilength = level * (s1+2*level) -ijump = (s1+2*level) * (s2+2*level) -call MPI_TYPE_VECTOR(icount, ilength, ijump, & -data_type, halo32, ierror) -call MPI_TYPE_COMMIT(halo32, ierror) -! receive from south -call MPI_IRECV(out(xs,ys,zs), 1, halo32, & -neighbour(3,4), tag_s, DECOMP_2D_COMM_CART_Z, & -requests(1), ierror) -! receive from north -call MPI_IRECV(out(xs,ye-level+1,zs), 1, halo32, & -neighbour(3,3), tag_n, DECOMP_2D_COMM_CART_Z, & -requests(2), ierror) -! send to south -call MPI_ISSEND(out(xs,ys+level,zs), 1, halo32, & -neighbour(3,4), tag_s, DECOMP_2D_COMM_CART_Z, & -requests(3), ierror) -! send to north -call MPI_ISSEND(out(xs,ye-level-level+1,zs), 1, halo32, & -neighbour(3,3), tag_n, DECOMP_2D_COMM_CART_Z, & -requests(4), ierror) -call MPI_WAITALL(4, requests, status, ierror) -call MPI_TYPE_FREE(halo32, ierror) -#ifdef HALO_DEBUG -if (nrank==4) then -write(*,*) 'After exchange in Y' -do i=xe,xs,-1 -write(*,'(10F4.0)') (out(i,j,1),j=ys,ye) -end do -end if -#endif - -! *** top/bottom *** -! all data in local memory already, no halo exchange - -end if ! pencil diff --git a/decomp2d/io.f90 b/decomp2d/io.f90 deleted file mode 100644 index 4e57021..0000000 --- a/decomp2d/io.f90 +++ /dev/null @@ -1,1041 +0,0 @@ -!======================================================================= -! This is part of the 2DECOMP&FFT library -! -! 2DECOMP&FFT is a software framework for general-purpose 2D (pencil) -! decomposition. It also implements a highly scalable distributed -! three-dimensional Fast Fourier Transform (FFT). -! -! Copyright (C) 2009-2013 Ning Li, the Numerical Algorithms Group (NAG) -! -!======================================================================= - -! This module provides parallel IO facilities for applications based on -! 2D decomposition. - -module decomp_2d_io - - use decomp_2d - use MPI -#ifdef T3PIO - use t3pio -#endif - - implicit none - - private ! Make everything private unless declared public - - public :: decomp_2d_write_one, decomp_2d_read_one, & - decomp_2d_write_var, decomp_2d_read_var, & - decomp_2d_write_scalar, decomp_2d_read_scalar, & - decomp_2d_write_plane, decomp_2d_write_every, & - decomp_2d_write_subdomain - - ! Generic interface to handle multiple data types - - interface decomp_2d_write_one - module procedure write_one_real - module procedure write_one_complex - module procedure mpiio_write_real_coarse - module procedure mpiio_write_real_probe - end interface decomp_2d_write_one - - interface decomp_2d_read_one - module procedure read_one_real - module procedure read_one_complex - end interface decomp_2d_read_one - - interface decomp_2d_write_var - module procedure write_var_real - module procedure write_var_complex - end interface decomp_2d_write_var - - interface decomp_2d_read_var - module procedure read_var_real - module procedure read_var_complex - end interface decomp_2d_read_var - - interface decomp_2d_write_scalar - module procedure write_scalar_real - module procedure write_scalar_complex - module procedure write_scalar_integer - module procedure write_scalar_logical - end interface decomp_2d_write_scalar - - interface decomp_2d_read_scalar - module procedure read_scalar_real - module procedure read_scalar_complex - module procedure read_scalar_integer - module procedure read_scalar_logical - end interface decomp_2d_read_scalar - - interface decomp_2d_write_plane - module procedure write_plane_3d_real - module procedure write_plane_3d_complex - ! module procedure write_plane_2d - end interface decomp_2d_write_plane - - interface decomp_2d_write_every - module procedure write_every_real - module procedure write_every_complex - end interface decomp_2d_write_every - - interface decomp_2d_write_subdomain - module procedure write_subdomain - end interface decomp_2d_write_subdomain - -contains - -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - ! Using MPI-IO library to write a single 3D array to a file -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - subroutine write_one_real(ipencil,var,filename,opt_decomp) - - implicit none - - integer, intent(IN) :: ipencil - real(mytype), dimension(:,:,:), intent(IN) :: var - character(len=*), intent(IN) :: filename - TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp - - TYPE(DECOMP_INFO) :: decomp - integer(kind=MPI_OFFSET_KIND) :: filesize, disp - integer, dimension(3) :: sizes, subsizes, starts - integer :: ierror, newtype, fh, data_type, info, gs - - data_type = real_type - -#include "io_write_one.inc" - - return - end subroutine write_one_real - - - subroutine write_one_complex(ipencil,var,filename,opt_decomp) - - implicit none - - integer, intent(IN) :: ipencil - complex(mytype), dimension(:,:,:), intent(IN) :: var - character(len=*), intent(IN) :: filename - TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp - - TYPE(DECOMP_INFO) :: decomp - integer(kind=MPI_OFFSET_KIND) :: filesize, disp - integer, dimension(3) :: sizes, subsizes, starts - integer :: ierror, newtype, fh, data_type, info, gs - - data_type = complex_type - -#include "io_write_one.inc" - - return - end subroutine write_one_complex - - -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - ! Using MPI-IO library to read from a file a single 3D array -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - subroutine read_one_real(ipencil,var,filename,opt_decomp) - - implicit none - - integer, intent(IN) :: ipencil - real(mytype), dimension(:,:,:), intent(INOUT) :: var - character(len=*), intent(IN) :: filename - TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp - - TYPE(DECOMP_INFO) :: decomp - integer(kind=MPI_OFFSET_KIND) :: disp - integer, dimension(3) :: sizes, subsizes, starts - integer :: ierror, newtype, fh, data_type - real(mytype_single), allocatable, dimension(:,:,:) :: varsingle - - data_type = real_type_single - allocate (varsingle(xstV(1):xenV(1),xstV(2):xenV(2),xstV(3):xenV(3))) - - if (present(opt_decomp)) then - decomp = opt_decomp - else - call get_decomp_info(decomp) - end if - - ! determine subarray parameters - sizes(1) = decomp%xsz(1) - sizes(2) = decomp%ysz(2) - sizes(3) = decomp%zsz(3) - - if (ipencil == 1) then - subsizes(1) = decomp%xsz(1) - subsizes(2) = decomp%xsz(2) - subsizes(3) = decomp%xsz(3) - starts(1) = decomp%xst(1)-1 ! 0-based index - starts(2) = decomp%xst(2)-1 - starts(3) = decomp%xst(3)-1 - else if (ipencil == 2) then - subsizes(1) = decomp%ysz(1) - subsizes(2) = decomp%ysz(2) - subsizes(3) = decomp%ysz(3) - starts(1) = decomp%yst(1)-1 - starts(2) = decomp%yst(2)-1 - starts(3) = decomp%yst(3)-1 - else if (ipencil == 3) then - subsizes(1) = decomp%zsz(1) - subsizes(2) = decomp%zsz(2) - subsizes(3) = decomp%zsz(3) - starts(1) = decomp%zst(1)-1 - starts(2) = decomp%zst(2)-1 - starts(3) = decomp%zst(3)-1 - endif - - call MPI_TYPE_CREATE_SUBARRAY(3, sizes, subsizes, starts, & - MPI_ORDER_FORTRAN, data_type, newtype, ierror) - call MPI_TYPE_COMMIT(newtype,ierror) - call MPI_FILE_OPEN(MPI_COMM_WORLD, filename, & - MPI_MODE_RDONLY, MPI_INFO_NULL, & - fh, ierror) - disp = 0_MPI_OFFSET_KIND - call MPI_FILE_SET_VIEW(fh,disp,data_type, & - newtype,'native',MPI_INFO_NULL,ierror) - call MPI_FILE_READ_ALL(fh, varsingle, & - subsizes(1)*subsizes(2)*subsizes(3), & - data_type, MPI_STATUS_IGNORE, ierror) - call MPI_FILE_CLOSE(fh,ierror) - call MPI_TYPE_FREE(newtype,ierror) - var = real(varsingle,mytype) - deallocate(varsingle) - - return - end subroutine read_one_real - - - subroutine read_one_complex(ipencil,var,filename,opt_decomp) - - implicit none - - integer, intent(IN) :: ipencil - complex(mytype), dimension(:,:,:), intent(INOUT) :: var - character(len=*), intent(IN) :: filename - TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp - - TYPE(DECOMP_INFO) :: decomp - integer(kind=MPI_OFFSET_KIND) :: disp - integer, dimension(3) :: sizes, subsizes, starts - integer :: ierror, newtype, fh, data_type - - data_type = complex_type - -#include "io_read_one.inc" - - return - end subroutine read_one_complex - - -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - ! Write a 3D array as part of a big MPI-IO file, starting from - ! displacement 'disp'; 'disp' will be updated after the writing - ! operation to prepare the writing of next chunk of data. -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - subroutine write_var_real(fh,disp,ipencil,var,opt_decomp) - - implicit none - - integer, intent(IN) :: fh - integer(KIND=MPI_OFFSET_KIND), intent(INOUT) :: disp - integer, intent(IN) :: ipencil - real(mytype), dimension(:,:,:), intent(IN) :: var - TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp - - TYPE(DECOMP_INFO) :: decomp - integer, dimension(3) :: sizes, subsizes, starts - integer :: ierror, newtype, data_type - - data_type = real_type - -#include "io_write_var.inc" - - return - end subroutine write_var_real - - - subroutine write_var_complex(fh,disp,ipencil,var,opt_decomp) - - implicit none - - integer, intent(IN) :: fh - integer(KIND=MPI_OFFSET_KIND), intent(INOUT) :: disp - integer, intent(IN) :: ipencil - complex(mytype), dimension(:,:,:), intent(IN) :: var - TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp - - TYPE(DECOMP_INFO) :: decomp - integer, dimension(3) :: sizes, subsizes, starts - integer :: ierror, newtype, data_type - - data_type = complex_type - -#include "io_write_var.inc" - - return - end subroutine write_var_complex - - -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - ! Read a 3D array as part of a big MPI-IO file, starting from - ! displacement 'disp'; 'disp' will be updated after the reading - ! operation to prepare the reading of next chunk of data. -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - subroutine read_var_real(fh,disp,ipencil,var,opt_decomp) - - implicit none - - integer, intent(IN) :: fh - integer(KIND=MPI_OFFSET_KIND), intent(INOUT) :: disp - integer, intent(IN) :: ipencil - real(mytype), dimension(:,:,:), intent(INOUT) :: var - TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp - - TYPE(DECOMP_INFO) :: decomp - integer, dimension(3) :: sizes, subsizes, starts - integer :: ierror, newtype, data_type - - data_type = real_type - -#include "io_read_var.inc" - - return - end subroutine read_var_real - - - subroutine read_var_complex(fh,disp,ipencil,var,opt_decomp) - - implicit none - - integer, intent(IN) :: fh - integer(KIND=MPI_OFFSET_KIND), intent(INOUT) :: disp - integer, intent(IN) :: ipencil - complex(mytype), dimension(:,:,:), intent(INOUT) :: var - TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp - - TYPE(DECOMP_INFO) :: decomp - integer, dimension(3) :: sizes, subsizes, starts - integer :: ierror, newtype, data_type - - data_type = complex_type - -#include "io_read_var.inc" - - return - end subroutine read_var_complex - - -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - ! Write scalar variables as part of a big MPI-IO file, starting from - ! displacement 'disp'; 'disp' will be updated after the reading - ! operation to prepare the reading of next chunk of data. -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - subroutine write_scalar_real(fh,disp,n,var) - - implicit none - - integer, intent(IN) :: fh ! file handle - integer(KIND=MPI_OFFSET_KIND), & - intent(INOUT) :: disp ! displacement - integer, intent(IN) :: n ! number of scalars - real(mytype), dimension(n), & - intent(IN) :: var ! array of scalars - - integer :: m, ierror - - call MPI_FILE_SET_VIEW(fh,disp,real_type, & - real_type,'native',MPI_INFO_NULL,ierror) - if (nrank==0) then - m = n ! only one rank needs to write - else - m = 0 - end if - call MPI_FILE_WRITE_ALL(fh, var, m, real_type, & - MPI_STATUS_IGNORE, ierror) - disp = disp + n*mytype_bytes - - return - end subroutine write_scalar_real - - - subroutine write_scalar_complex(fh,disp,n,var) - - implicit none - - integer, intent(IN) :: fh - integer(KIND=MPI_OFFSET_KIND), intent(INOUT) :: disp - integer, intent(IN) :: n - complex(mytype), dimension(n), intent(IN) :: var - - integer :: m, ierror - - call MPI_FILE_SET_VIEW(fh,disp,complex_type, & - complex_type,'native',MPI_INFO_NULL,ierror) - if (nrank==0) then - m = n - else - m = 0 - end if - call MPI_FILE_WRITE_ALL(fh, var, m, complex_type, & - MPI_STATUS_IGNORE, ierror) - disp = disp + n*mytype_bytes*2 - - return - end subroutine write_scalar_complex - - - subroutine write_scalar_integer(fh,disp,n,var) - - implicit none - - integer, intent(IN) :: fh - integer(KIND=MPI_OFFSET_KIND), intent(INOUT) :: disp - integer, intent(IN) :: n - integer, dimension(n), intent(IN) :: var - - integer :: m, ierror - - call MPI_FILE_SET_VIEW(fh,disp,MPI_INTEGER, & - MPI_INTEGER,'native',MPI_INFO_NULL,ierror) - if (nrank==0) then - m = n - else - m = 0 - end if - call MPI_FILE_WRITE_ALL(fh, var, m, MPI_INTEGER, & - MPI_STATUS_IGNORE, ierror) - call MPI_TYPE_SIZE(MPI_INTEGER,m,ierror) - disp = disp + n*m - - return - end subroutine write_scalar_integer - - - subroutine write_scalar_logical(fh,disp,n,var) - - implicit none - - integer, intent(IN) :: fh - integer(KIND=MPI_OFFSET_KIND), intent(INOUT) :: disp - integer, intent(IN) :: n - logical, dimension(n), intent(IN) :: var - - integer :: m, ierror - - call MPI_FILE_SET_VIEW(fh,disp,MPI_LOGICAL, & - MPI_LOGICAL,'native',MPI_INFO_NULL,ierror) - if (nrank==0) then - m = n - else - m = 0 - end if - call MPI_FILE_WRITE_ALL(fh, var, m, MPI_LOGICAL, & - MPI_STATUS_IGNORE, ierror) - call MPI_TYPE_SIZE(MPI_LOGICAL,m,ierror) - disp = disp + n*m - - return - end subroutine write_scalar_logical - - -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - ! Read scalar variables as part of a big MPI-IO file, starting from - ! displacement 'disp'; 'disp' will be updated after the reading - ! operation to prepare the reading of next chunk of data. -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - subroutine read_scalar_real(fh,disp,n,var) - - implicit none - - integer, intent(IN) :: fh ! file handle - integer(KIND=MPI_OFFSET_KIND), & - intent(INOUT) :: disp ! displacement - integer, intent(IN) :: n ! number of scalars - real(mytype), dimension(n), & - intent(INOUT) :: var ! array of scalars - - integer :: ierror - - call MPI_FILE_SET_VIEW(fh,disp,real_type, & - real_type,'native',MPI_INFO_NULL,ierror) - call MPI_FILE_READ_ALL(fh, var, n, real_type, & - MPI_STATUS_IGNORE, ierror) - disp = disp + n*mytype_bytes - - return - end subroutine read_scalar_real - - - subroutine read_scalar_complex(fh,disp,n,var) - - implicit none - - integer, intent(IN) :: fh - integer(KIND=MPI_OFFSET_KIND), intent(INOUT) :: disp - integer, intent(IN) :: n - complex(mytype), dimension(n), intent(INOUT) :: var - - integer :: ierror - - call MPI_FILE_SET_VIEW(fh,disp,complex_type, & - complex_type,'native',MPI_INFO_NULL,ierror) - call MPI_FILE_READ_ALL(fh, var, n, complex_type, & - MPI_STATUS_IGNORE, ierror) - disp = disp + n*mytype_bytes*2 - - return - end subroutine read_scalar_complex - - - subroutine read_scalar_integer(fh,disp,n,var) - - implicit none - - integer, intent(IN) :: fh - integer(KIND=MPI_OFFSET_KIND), intent(INOUT) :: disp - integer, intent(IN) :: n - integer, dimension(n), intent(INOUT) :: var - - integer :: m, ierror - - call MPI_FILE_SET_VIEW(fh,disp,MPI_INTEGER, & - MPI_INTEGER,'native',MPI_INFO_NULL,ierror) - call MPI_FILE_READ_ALL(fh, var, n, MPI_INTEGER, & - MPI_STATUS_IGNORE, ierror) - call MPI_TYPE_SIZE(MPI_INTEGER,m,ierror) - disp = disp + n*m - - return - end subroutine read_scalar_integer - - - subroutine read_scalar_logical(fh,disp,n,var) - - implicit none - - integer, intent(IN) :: fh - integer(KIND=MPI_OFFSET_KIND), intent(INOUT) :: disp - integer, intent(IN) :: n - logical, dimension(n), intent(INOUT) :: var - - integer :: m, ierror - - call MPI_FILE_SET_VIEW(fh,disp,MPI_LOGICAL, & - MPI_LOGICAL,'native',MPI_INFO_NULL,ierror) - call MPI_FILE_READ_ALL(fh, var, n, MPI_LOGICAL, & - MPI_STATUS_IGNORE, ierror) - call MPI_TYPE_SIZE(MPI_LOGICAL,m,ierror) - disp = disp + n*m - - return - end subroutine read_scalar_logical - - -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - ! Write a 2D slice of the 3D data to a file -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - subroutine write_plane_3d_real(ipencil,var,iplane,n,filename, & - opt_decomp) - - implicit none - - integer, intent(IN) :: ipencil !(x-pencil=1; y-pencil=2; z-pencil=3) - real(mytype), dimension(:,:,:), intent(IN) :: var - integer, intent(IN) :: iplane !(x-plane=1; y-plane=2; z-plane=3) - integer, intent(IN) :: n ! which plane to write (global coordinate) - character(len=*), intent(IN) :: filename - TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp - - real(mytype), allocatable, dimension(:,:,:) :: wk, wk2 - real(mytype), allocatable, dimension(:,:,:) :: wk2d - TYPE(DECOMP_INFO) :: decomp - integer(kind=MPI_OFFSET_KIND) :: filesize, disp - integer, dimension(3) :: sizes, subsizes, starts - integer :: i,j,k, ierror, newtype, fh, data_type - - data_type = real_type - -#include "io_write_plane.inc" - - return - end subroutine write_plane_3d_real - - - subroutine write_plane_3d_complex(ipencil,var,iplane,n, & - filename,opt_decomp) - - implicit none - - integer, intent(IN) :: ipencil !(x-pencil=1; y-pencil=2; z-pencil=3) - complex(mytype), dimension(:,:,:), intent(IN) :: var - integer, intent(IN) :: iplane !(x-plane=1; y-plane=2; z-plane=3) - integer, intent(IN) :: n ! which plane to write (global coordinate) - character(len=*), intent(IN) :: filename - TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp - - complex(mytype), allocatable, dimension(:,:,:) :: wk, wk2 - complex(mytype), allocatable, dimension(:,:,:) :: wk2d - TYPE(DECOMP_INFO) :: decomp - integer(kind=MPI_OFFSET_KIND) :: filesize, disp - integer, dimension(3) :: sizes, subsizes, starts - integer :: i,j,k, ierror, newtype, fh, data_type - - data_type = complex_type - -#include "io_write_plane.inc" - - return - end subroutine write_plane_3d_complex - - -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - ! Write a 2D array to a file -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - !************** TO DO *************** - !* Consider handling distributed 2D data set - ! subroutine write_plane_2d(ipencil,var,filename) - ! integer, intent(IN) :: ipencil !(x-pencil=1; y-pencil=2; z-pencil=3) - ! real(mytype), dimension(:,:), intent(IN) :: var ! 2D array - ! character(len=*), intent(IN) :: filename - ! - ! if (ipencil==1) then - ! ! var should be defined as var(xsize(2) - ! - ! else if (ipencil==2) then - ! - ! else if (ipencil==3) then - ! - ! end if - ! - ! return - ! end subroutine write_plane_2d - - -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - ! Write 3D array data for every specified mesh point -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - subroutine write_every_real(ipencil,var,iskip,jskip,kskip, & - filename, from1) - - implicit none - - integer, intent(IN) :: ipencil !(x-pencil=1; y-pencil=2; z-pencil=3) - real(mytype), dimension(:,:,:), intent(IN) :: var - integer, intent(IN) :: iskip,jskip,kskip - character(len=*), intent(IN) :: filename - logical, intent(IN) :: from1 ! .true. - save 1,n+1,2n+1... - ! .false. - save n,2n,3n... - - real(mytype), allocatable, dimension(:,:,:) :: wk, wk2 - integer(kind=MPI_OFFSET_KIND) :: filesize, disp - integer, dimension(3) :: sizes, subsizes, starts - integer :: i,j,k, ierror, newtype, fh, key,color,newcomm, data_type - integer, dimension(3) :: xsz,ysz,zsz,xst,yst,zst,xen,yen,zen,skip - - data_type = real_type - -#include "io_write_every.inc" - - return - end subroutine write_every_real - - - subroutine write_every_complex(ipencil,var,iskip,jskip,kskip, & - filename, from1) - - implicit none - - integer, intent(IN) :: ipencil !(x-pencil=1; y-pencil=2; z-pencil=3) - complex(mytype), dimension(:,:,:), intent(IN) :: var - integer, intent(IN) :: iskip,jskip,kskip - character(len=*), intent(IN) :: filename - logical, intent(IN) :: from1 ! .true. - save 1,n+1,2n+1... - ! .false. - save n,2n,3n... - - complex(mytype), allocatable, dimension(:,:,:) :: wk, wk2 - integer(kind=MPI_OFFSET_KIND) :: filesize, disp - integer, dimension(3) :: sizes, subsizes, starts - integer :: i,j,k, ierror, newtype, fh, key,color,newcomm, data_type - integer, dimension(3) :: xsz,ysz,zsz,xst,yst,zst,xen,yen,zen,skip - - data_type = complex_type - -#include "io_write_every.inc" - - return - end subroutine write_every_complex - - - subroutine mpiio_write_real_coarse(ipencil,var,filename,icoarse) - - ! USE param - ! USE variables - - implicit none - - integer, intent(IN) :: ipencil !(x-pencil=1; y-pencil=2; z-pencil=3) - integer, intent(IN) :: icoarse !(nstat=1; nvisu=2) - real(mytype), dimension(:,:,:), intent(IN) :: var - real(mytype_single), allocatable, dimension(:,:,:) :: varsingle - character(len=*) :: filename - - integer (kind=MPI_OFFSET_KIND) :: filesize, disp - integer, dimension(3) :: sizes, subsizes, starts - integer :: i,j,k, ierror, newtype, fh - - if (icoarse==1) then - sizes(1) = xszS(1) - sizes(2) = yszS(2) - sizes(3) = zszS(3) - - if (ipencil == 1) then - subsizes(1) = xszS(1) - subsizes(2) = xszS(2) - subsizes(3) = xszS(3) - starts(1) = xstS(1)-1 ! 0-based index - starts(2) = xstS(2)-1 - starts(3) = xstS(3)-1 - else if (ipencil == 2) then - subsizes(1) = yszS(1) - subsizes(2) = yszS(2) - subsizes(3) = yszS(3) - starts(1) = ystS(1)-1 - starts(2) = ystS(2)-1 - starts(3) = ystS(3)-1 - else if (ipencil == 3) then - subsizes(1) = zszS(1) - subsizes(2) = zszS(2) - subsizes(3) = zszS(3) - starts(1) = zstS(1)-1 - starts(2) = zstS(2)-1 - starts(3) = zstS(3)-1 - endif - endif - - if (icoarse==2) then - sizes(1) = xszV(1) - sizes(2) = yszV(2) - sizes(3) = zszV(3) - - if (ipencil == 1) then - subsizes(1) = xszV(1) - subsizes(2) = xszV(2) - subsizes(3) = xszV(3) - starts(1) = xstV(1)-1 ! 0-based index - starts(2) = xstV(2)-1 - starts(3) = xstV(3)-1 - else if (ipencil == 2) then - subsizes(1) = yszV(1) - subsizes(2) = yszV(2) - subsizes(3) = yszV(3) - starts(1) = ystV(1)-1 - starts(2) = ystV(2)-1 - starts(3) = ystV(3)-1 - else if (ipencil == 3) then - subsizes(1) = zszV(1) - subsizes(2) = zszV(2) - subsizes(3) = zszV(3) - starts(1) = zstV(1)-1 - starts(2) = zstV(2)-1 - starts(3) = zstV(3)-1 - endif - endif - - allocate (varsingle(xstV(1):xenV(1),xstV(2):xenV(2),xstV(3):xenV(3))) - varsingle=var - call MPI_TYPE_CREATE_SUBARRAY(3, sizes, subsizes, starts, & - MPI_ORDER_FORTRAN, real_type_single, newtype, ierror) - call MPI_TYPE_COMMIT(newtype,ierror) - call MPI_FILE_OPEN(MPI_COMM_WORLD, filename, & - MPI_MODE_CREATE+MPI_MODE_WRONLY, MPI_INFO_NULL, & - fh, ierror) - filesize = 0_MPI_OFFSET_KIND - call MPI_FILE_SET_SIZE(fh,filesize,ierror) ! guarantee overwriting - disp = 0_MPI_OFFSET_KIND - call MPI_FILE_SET_VIEW(fh,disp,real_type_single, & - newtype,'native',MPI_INFO_NULL,ierror) - call MPI_FILE_WRITE_ALL(fh, varsingle, & - subsizes(1)*subsizes(2)*subsizes(3), & - real_type_single, MPI_STATUS_IGNORE, ierror) - call MPI_FILE_CLOSE(fh,ierror) - call MPI_TYPE_FREE(newtype,ierror) - deallocate(varsingle) - - return - end subroutine mpiio_write_real_coarse - - subroutine mpiio_write_real_probe(ipencil,var,filename,nlength) - - ! USE param - ! USE variables - - implicit none - - integer, intent(IN) :: ipencil !(x-pencil=1; y-pencil=2; z-pencil=3) - integer, intent(in) :: nlength - real(mytype), dimension(:,:,:,:), intent(IN) :: var - - character(len=*) :: filename - - integer (kind=MPI_OFFSET_KIND) :: filesize, disp - integer, dimension(4) :: sizes, subsizes, starts - integer :: i,j,k, ierror, newtype, fh - - - sizes(1) = xszP(1) - sizes(2) = yszP(2) - sizes(3) = zszP(3) - sizes(4) = nlength - if (ipencil == 1) then - subsizes(1) = xszP(1) - subsizes(2) = xszP(2) - subsizes(3) = xszP(3) - subsizes(4) = nlength - starts(1) = xstP(1)-1 ! 0-based index - starts(2) = xstP(2)-1 - starts(3) = xstP(3)-1 - starts(4) = 0 - else if (ipencil == 2) then - subsizes(1) = yszP(1) - subsizes(2) = yszP(2) - subsizes(3) = yszP(3) - starts(1) = ystP(1)-1 - starts(2) = ystP(2)-1 - starts(3) = ystP(3)-1 - else if (ipencil == 3) then - subsizes(1) = zszP(1) - subsizes(2) = zszP(2) - subsizes(3) = zszP(3) - starts(1) = zstP(1)-1 - starts(2) = zstP(2)-1 - starts(3) = zstP(3)-1 - endif - ! print *,nrank,starts(1),starts(2),starts(3),starts(4) - call MPI_TYPE_CREATE_SUBARRAY(4, sizes, subsizes, starts, & - MPI_ORDER_FORTRAN, real_type, newtype, ierror) - call MPI_TYPE_COMMIT(newtype,ierror) - call MPI_FILE_OPEN(MPI_COMM_WORLD, filename, & - MPI_MODE_CREATE+MPI_MODE_WRONLY, MPI_INFO_NULL, & - fh, ierror) - filesize = 0_MPI_OFFSET_KIND - call MPI_FILE_SET_SIZE(fh,filesize,ierror) ! guarantee overwriting - disp = 0_MPI_OFFSET_KIND - call MPI_FILE_SET_VIEW(fh,disp,real_type, & - newtype,'native',MPI_INFO_NULL,ierror) - call MPI_FILE_WRITE_ALL(fh, var, & - subsizes(1)*subsizes(2)*subsizes(3)*subsizes(4), & - real_type, MPI_STATUS_IGNORE, ierror) - call MPI_FILE_CLOSE(fh,ierror) - call MPI_TYPE_FREE(newtype,ierror) - - - return - end subroutine mpiio_write_real_probe - -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - ! Write a 3D data set covering a smaller sub-domain only -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - subroutine write_subdomain(ipencil,var,is,ie,js,je,ks,ke,filename) - - implicit none - - integer, intent(IN) :: ipencil !(x-pencil=1; y-pencil=2; z-pencil=3) - real(mytype), dimension(:,:,:), intent(IN) :: var - integer, intent(IN) :: is, ie, js, je, ks, ke - character(len=*), intent(IN) :: filename - - real(mytype), allocatable, dimension(:,:,:) :: wk, wk2 - integer(kind=MPI_OFFSET_KIND) :: filesize, disp - integer, dimension(3) :: sizes, subsizes, starts - integer :: color, key, errorcode, newcomm, ierror - integer :: newtype, fh, data_type, i, j, k - integer :: i1, i2, j1, j2, k1, k2 - - data_type = real_type - - ! validate the input paramters - if (is<1 .OR. ie>nx_global .OR. js<1 .OR. je>ny_global .OR. & - ks<1 .OR. ke>nz_global) then - errorcode = 10 - call decomp_2d_abort(errorcode, & - 'Invalid subdomain specified in I/O') - end if - - ! create a communicator for all those MPI ranks containing the subdomain - color = 1 - key = 1 - if (ipencil==1) then - if (xstart(1)>ie .OR. xend(1)je .OR. xend(2)ke .OR. xend(3)ie .OR. yend(1)je .OR. yend(2)ke .OR. yend(3)ie .OR. zend(1)je .OR. zend(2)ke .OR. zend(3)ie .AND. xstart(1)ie) then - subsizes(1) = ie - xstart(1) + 1 - end if - subsizes(2) = xsize(2) - starts(2) = xstart(2) - js - if (xend(2)>je .AND. xstart(2)je) then - subsizes(2) = je - xstart(2) + 1 - end if - subsizes(3) = xsize(3) - starts(3) = xstart(3) - ks - if (xend(3)>ke .AND. xstart(3)ke) then - subsizes(3) = ke - xstart(3) + 1 - end if - - else if (ipencil==2) then - - ! TODO - - else if (ipencil==3) then - - ! TODO - - end if - - - ! copy data from orginal to a temp array - ! pay attention to blocks only partially cover the sub-domain - if (ipencil==1) then - - if (xend(1)>ie .AND. xstart(1)ie) then - i1 = xstart(1) - i2 = ie - else if (xstart(1)je .AND. xstart(2)je) then - j1 = xstart(2) - j2 = je - else if (xstart(2)ke .AND. xstart(3)ke) then - k1 = xstart(3) - k2 = ke - else if (xstart(3) receive buffer - ! out --> destination array - ! pos --> pointer for the receive buffer - ! - for normal ALLTOALLV, points to the beginning of receive buffer (=1) - ! - for shared memory code, note the receive buffer is shared by all cores - ! on same node, so 'pos' points to the correct location for this core - - integer, intent(IN) :: ndir - integer, intent(IN) :: iproc - integer, dimension(0:iproc-1), intent(IN) :: dist - TYPE(DECOMP_INFO), intent(IN) :: decomp - - integer :: i,j,k, m,i1,i2,pos - -#ifndef SHM - pos = 1 -#endif - do m=0,iproc-1 - if (m==0) then - i1 = 1 - i2 = dist(0) - else - i1 = i2+1 - i2 = i1+dist(m)-1 - endif - - if (ndir==1) then -#ifdef SHM - pos = decomp%y1disp_o(m) + 1 -#endif - do k=1,n3 - do j=i1,i2 - do i=1,n1 - out(i,j,k) = in(pos) - pos = pos + 1 - enddo - enddo - enddo - else if (ndir==2) then -#ifdef SHM - pos = decomp%z2disp_o(m) + 1 -#endif - do k=i1,i2 - do j=1,n2 - do i=1,n1 - out(i,j,k) = in(pos) - pos = pos + 1 - enddo - enddo - enddo - else if (ndir==3) then -#ifdef SHM - pos = decomp%y2disp_o(m) + 1 -#endif - do k=1,n3 - do j=i1,i2 - do i=1,n1 - out(i,j,k) = in(pos) - pos = pos + 1 - enddo - enddo - enddo - else if (ndir==4) then -#ifdef SHM - pos = decomp%x1disp_o(m) + 1 -#endif - do k=1,n3 - do j=1,n2 - do i=i1,i2 - out(i,j,k) = in(pos) - pos = pos + 1 - enddo - enddo - enddo - endif - enddo diff --git a/decomp2d/mem_split.f90 b/decomp2d/mem_split.f90 deleted file mode 100644 index bb56c95..0000000 --- a/decomp2d/mem_split.f90 +++ /dev/null @@ -1,92 +0,0 @@ - !======================================================================= - ! This is part of the 2DECOMP&FFT library - ! - ! 2DECOMP&FFT is a software framework for general-purpose 2D (pencil) - ! decomposition. It also implements a highly scalable distributed - ! three-dimensional Fast Fourier Transform (FFT). - ! - ! Copyright (C) 2009-2011 Ning Li, the Numerical Algorithms Group (NAG) - ! - !======================================================================= - - ! This file contain duplicated code that gathers data from source to - ! MPI_ALLTOALLV send buffer. It is 'included' by two subroutines in - ! decomp_2d.f90 - - ! Note: - ! in --> source array - ! out --> send buffer - ! pos --> pointer for the send buffer - ! - for normal ALLTOALLV, points to the beginning of send buffer (=1) - ! - for shared memory code, note the send buffer is shared by all cores - ! on same node, so 'pos' points to the correct location for this core - - integer, intent(IN) :: ndir - integer, intent(IN) :: iproc - integer, dimension(0:iproc-1), intent(IN) :: dist - TYPE(DECOMP_INFO), intent(IN) :: decomp - - integer :: i,j,k, m,i1,i2,pos - -#ifndef SHM - pos = 1 -#endif - do m=0,iproc-1 - if (m==0) then - i1 = 1 - i2 = dist(0) - else - i1 = i2+1 - i2 = i1+dist(m)-1 - endif - - if (ndir==1) then -#ifdef SHM - pos = decomp%x1disp_o(m) + 1 -#endif - do k=1,n3 - do j=1,n2 - do i=i1,i2 - out(pos) = in(i,j,k) - pos = pos + 1 - enddo - enddo - enddo - else if (ndir==2) then -#ifdef SHM - pos = decomp%y2disp_o(m) + 1 -#endif - do k=1,n3 - do j=i1,i2 - do i=1,n1 - out(pos) = in(i,j,k) - pos = pos + 1 - enddo - enddo - enddo - else if (ndir==3) then -#ifdef SHM - pos = decomp%z2disp_o(m) + 1 -#endif - do k=i1,i2 - do j=1,n2 - do i=1,n1 - out(pos) = in(i,j,k) - pos = pos + 1 - enddo - enddo - enddo - else if (ndir==4) then -#ifdef SHM - pos = decomp%y1disp_o(m) + 1 -#endif - do k=1,n3 - do j=i1,i2 - do i=1,n1 - out(pos) = in(i,j,k) - pos = pos + 1 - enddo - enddo - enddo - endif - enddo diff --git a/decomp2d/mkl_dfti.f90 b/decomp2d/mkl_dfti.f90 deleted file mode 100644 index 483206c..0000000 --- a/decomp2d/mkl_dfti.f90 +++ /dev/null @@ -1,776 +0,0 @@ -!=============================================================================== -! Copyright 2002-2015 Intel Corporation All Rights Reserved. -! -! The source code, information and material ("Material") contained herein is -! owned by Intel Corporation or its suppliers or licensors, and title to such -! Material remains with Intel Corporation or its suppliers or licensors. The -! Material contains proprietary information of Intel or its suppliers and -! licensors. The Material is protected by worldwide copyright laws and treaty -! provisions. No part of the Material may be used, copied, reproduced, -! modified, published, uploaded, posted, transmitted, distributed or disclosed -! in any way without Intel's prior express written permission. No license under -! any patent, copyright or other intellectual property rights in the Material -! is granted to or conferred upon you, either expressly, by implication, -! inducement, estoppel or otherwise. Any license under such intellectual -! property rights must be express and approved by Intel in writing. -! -! Unless otherwise agreed by Intel in writing, you may not remove or alter this -! notice or any other notice embedded in Materials by Intel or Intel's -! suppliers or licensors in any way. -!=============================================================================== - -! Content: -! Intel(R) Math Kernel Library (MKL) -! Discrete Fourier Transform Interface (DFTI) -!***************************************************************************** - -MODULE MKL_DFT_TYPE - - TYPE, PUBLIC :: DFTI_DESCRIPTOR - PRIVATE - INTEGER :: dontuse - ! Structure of this type is not used in Fortran code - ! the pointer to this type is used only - END TYPE DFTI_DESCRIPTOR - - !====================================================================== - ! These real type kind parameters are not for direct use - !====================================================================== - - INTEGER, PARAMETER :: DFTI_SPKP = SELECTED_REAL_KIND(6,37) - INTEGER, PARAMETER :: DFTI_DPKP = SELECTED_REAL_KIND(15,307) - - !====================================================================== - ! Descriptor configuration parameters [default values in brackets] - !====================================================================== - - ! Domain for forward transform. No default value - INTEGER, PARAMETER :: DFTI_FORWARD_DOMAIN = 0 - - ! Dimensionality, or rank. No default value - INTEGER, PARAMETER :: DFTI_DIMENSION = 1 - - ! Length(s) of transform. No default value - INTEGER, PARAMETER :: DFTI_LENGTHS = 2 - - ! Floating point precision. No default value - INTEGER, PARAMETER :: DFTI_PRECISION = 3 - - ! Scale factor for forward transform [1.0] - INTEGER, PARAMETER :: DFTI_FORWARD_SCALE = 4 - - ! Scale factor for backward transform [1.0] - INTEGER, PARAMETER :: DFTI_BACKWARD_SCALE = 5 - - ! Exponent sign for forward transform [DFTI_NEGATIVE] - ! INTEGER, PARAMETER :: DFTI_FORWARD_SIGN = 6 ! NOT IMPLEMENTED - - ! Number of data sets to be transformed [1] - INTEGER, PARAMETER :: DFTI_NUMBER_OF_TRANSFORMS = 7 - - ! Storage of finite complex-valued sequences in complex domain - ! [DFTI_COMPLEX_COMPLEX] - INTEGER, PARAMETER :: DFTI_COMPLEX_STORAGE = 8 - - ! Storage of finite real-valued sequences in real domain - ! [DFTI_REAL_REAL] - INTEGER, PARAMETER :: DFTI_REAL_STORAGE = 9 - - ! Storage of finite complex-valued sequences in conjugate-even - ! domain [DFTI_COMPLEX_REAL] - INTEGER, PARAMETER :: DFTI_CONJUGATE_EVEN_STORAGE = 10 - - ! Placement of result [DFTI_INPLACE] - INTEGER, PARAMETER :: DFTI_PLACEMENT = 11 - - ! Generalized strides for input data layout - ! [tigth, col-major for Fortran] - INTEGER, PARAMETER :: DFTI_INPUT_STRIDES = 12 - - ! Generalized strides for output data layout - ! [tigth, col-major for Fortran] - INTEGER, PARAMETER :: DFTI_OUTPUT_STRIDES = 13 - - ! Distance between first input elements for multiple transforms [0] - INTEGER, PARAMETER :: DFTI_INPUT_DISTANCE = 14 - - ! Distance between first output elements for multiple transforms [0] - INTEGER, PARAMETER :: DFTI_OUTPUT_DISTANCE = 15 - - ! Effort spent in initialization [DFTI_MEDIUM] - ! INTEGER, PARAMETER :: DFTI_INITIALIZATION_EFFORT = 16 ! NOT IMPLEMENTED - - ! Use of workspace during computation [DFTI_ALLOW] - INTEGER, PARAMETER :: DFTI_WORKSPACE = 17 - - ! Ordering of the result [DFTI_ORDERED] - INTEGER, PARAMETER :: DFTI_ORDERING = 18 - - ! Possible transposition of result [DFTI_NONE] - INTEGER, PARAMETER :: DFTI_TRANSPOSE = 19 - - ! User-settable descriptor name [""] - INTEGER, PARAMETER :: DFTI_DESCRIPTOR_NAME = 20 - - ! Packing format for DFTI_COMPLEX_REAL storage of finite - ! conjugate-even sequences [DFTI_CCS_FORMAT] - INTEGER, PARAMETER :: DFTI_PACKED_FORMAT = 21 - - ! Commit status of the descriptor. Read-only parameter - INTEGER, PARAMETER :: DFTI_COMMIT_STATUS = 22 - - ! Version string for this DFTI implementation. Read-only parameter - INTEGER, PARAMETER :: DFTI_VERSION = 23 - - ! Ordering of the forward transform. Read-only parameter - ! INTEGER, PARAMETER :: DFTI_FORWARD_ORDERING = 24 ! NOT IMPLEMENTED - - ! Ordering of the backward transform. Read-only parameter - ! INTEGER, PARAMETER :: DFTI_BACKWARD_ORDERING = 25 ! NOT IMPLEMENTED - - ! Number of user threads that share the descriptor [1] - INTEGER, PARAMETER :: DFTI_NUMBER_OF_USER_THREADS = 26 - - ! Limit the number of threads used by this descriptor [0 = don't care] - INTEGER, PARAMETER :: DFTI_THREAD_LIMIT = 27 - - !====================================================================== - ! Values of the descriptor configuration parameters - !====================================================================== - - ! DFTI_COMMIT_STATUS - INTEGER, PARAMETER :: DFTI_COMMITTED = 30 - INTEGER, PARAMETER :: DFTI_UNCOMMITTED = 31 - - ! DFTI_FORWARD_DOMAIN - INTEGER, PARAMETER :: DFTI_COMPLEX = 32 - INTEGER, PARAMETER :: DFTI_REAL = 33 - ! INTEGER, PARAMETER :: DFTI_CONJUGATE_EVEN = 34 ! NOT IMPLEMENTED - - ! DFTI_PRECISION - INTEGER, PARAMETER :: DFTI_SINGLE = 35 - INTEGER, PARAMETER :: DFTI_DOUBLE = 36 - - ! DFTI_PRECISION for reduced size of statically linked application. - ! Recommended use: modify statement 'USE MKL_DFTI' in your program, - ! so that it reads as either of: - ! USE MKL_DFTI, FORGET=>DFTI_SINGLE, DFTI_SINGLE=>DFTI_SINGLE_R - ! USE MKL_DFTI, FORGET=>DFTI_DOUBLE, DFTI_DOUBLE=>DFTI_DOUBLE_R - ! where word 'FORGET' can be any name not used in the program. - REAL(DFTI_SPKP), PARAMETER :: DFTI_SINGLE_R = 35 - REAL(DFTI_DPKP), PARAMETER :: DFTI_DOUBLE_R = 36 - - ! DFTI_FORWARD_SIGN - ! INTEGER, PARAMETER :: DFTI_NEGATIVE = 37 ! NOT IMPLEMENTED - ! INTEGER, PARAMETER :: DFTI_POSITIVE = 38 ! NOT IMPLEMENTED - - ! DFTI_COMPLEX_STORAGE and DFTI_CONJUGATE_EVEN_STORAGE - INTEGER, PARAMETER :: DFTI_COMPLEX_COMPLEX = 39 - INTEGER, PARAMETER :: DFTI_COMPLEX_REAL = 40 - - ! DFTI_REAL_STORAGE - INTEGER, PARAMETER :: DFTI_REAL_COMPLEX = 41 - INTEGER, PARAMETER :: DFTI_REAL_REAL = 42 - - ! DFTI_PLACEMENT - INTEGER, PARAMETER :: DFTI_INPLACE = 43 ! Result overwrites input - INTEGER, PARAMETER :: DFTI_NOT_INPLACE = 44 ! Have another place for result - - ! DFTI_INITIALIZATION_EFFORT - ! INTEGER, PARAMETER :: DFTI_LOW = 45 ! NOT IMPLEMENTED - ! INTEGER, PARAMETER :: DFTI_MEDIUM = 46 ! NOT IMPLEMENTED - ! INTEGER, PARAMETER :: DFTI_HIGH = 47 ! NOT IMPLEMENTED - - ! DFTI_ORDERING - INTEGER, PARAMETER :: DFTI_ORDERED = 48 - INTEGER, PARAMETER :: DFTI_BACKWARD_SCRAMBLED = 49 - ! INTEGER, PARAMETER :: DFTI_FORWARD_SCRAMBLED = 50 ! NOT IMPLEMENTED - - ! Allow/avoid certain usages - INTEGER, PARAMETER :: DFTI_ALLOW = 51 ! Allow transposition or workspace - INTEGER, PARAMETER :: DFTI_AVOID = 52 ! Avoid auxiliary storage - INTEGER, PARAMETER :: DFTI_NONE = 53 - - ! DFTI_PACKED_FORMAT - ! (for storing congugate-even finite sequence in real array) - INTEGER, PARAMETER :: DFTI_CCS_FORMAT = 54 ! Complex conjugate-symmetric - INTEGER, PARAMETER :: DFTI_PACK_FORMAT = 55 ! Pack format for real DFT - INTEGER, PARAMETER :: DFTI_PERM_FORMAT = 56 ! Perm format for real DFT - INTEGER, PARAMETER :: DFTI_CCE_FORMAT = 57 ! Complex conjugate-even - - !====================================================================== - ! Error classes - !====================================================================== - INTEGER, PARAMETER :: DFTI_NO_ERROR = 0 - INTEGER, PARAMETER :: DFTI_MEMORY_ERROR = 1 - INTEGER, PARAMETER :: DFTI_INVALID_CONFIGURATION = 2 - INTEGER, PARAMETER :: DFTI_INCONSISTENT_CONFIGURATION = 3 - INTEGER, PARAMETER :: DFTI_MULTITHREADED_ERROR = 4 - INTEGER, PARAMETER :: DFTI_BAD_DESCRIPTOR = 5 - INTEGER, PARAMETER :: DFTI_UNIMPLEMENTED = 6 - INTEGER, PARAMETER :: DFTI_MKL_INTERNAL_ERROR = 7 - INTEGER, PARAMETER :: DFTI_NUMBER_OF_THREADS_ERROR = 8 - INTEGER, PARAMETER :: DFTI_1D_LENGTH_EXCEEDS_INT32 = 9 - - ! Maximum length of error string - INTEGER, PARAMETER :: DFTI_MAX_MESSAGE_LENGTH = 80 - - ! Maximum length of user-settable descriptor name - INTEGER, PARAMETER :: DFTI_MAX_NAME_LENGTH = 10 - - ! Maximum length of MKL version string - INTEGER, PARAMETER :: DFTI_VERSION_LENGTH = 198 - -END MODULE MKL_DFT_TYPE - -MODULE MKL_DFTI - - USE MKL_DFT_TYPE - - INTERFACE DftiCreateDescriptor - - ! overloading of DftiCreateDescriptor for 1D DFT - FUNCTION dfti_create_descriptor_1d(desc, precision, domain, dim, length) - USE MKL_DFT_TYPE - !DEC$ ATTRIBUTES C :: dfti_create_descriptor_1d - !DEC$ ATTRIBUTES REFERENCE :: dfti_create_descriptor_1d - INTEGER dfti_create_descriptor_1d - TYPE(DFTI_DESCRIPTOR), POINTER :: desc - INTEGER, INTENT(IN) :: precision - INTEGER, INTENT(IN) :: domain - INTEGER, INTENT(IN) :: dim, length - END FUNCTION dfti_create_descriptor_1d - - ! overloading of DftiCreateDescriptor for nD DFT - FUNCTION dfti_create_descriptor_highd(desc, precision, domain, dim,length) - USE MKL_DFT_TYPE - !DEC$ ATTRIBUTES C :: dfti_create_descriptor_highd - !DEC$ ATTRIBUTES REFERENCE :: dfti_create_descriptor_highd - INTEGER dfti_create_descriptor_highd - TYPE(DFTI_DESCRIPTOR), POINTER :: desc - INTEGER, INTENT(IN) :: precision - INTEGER, INTENT(IN) :: domain - INTEGER, INTENT(IN) :: dim - INTEGER, INTENT(IN), DIMENSION(*) :: length - END FUNCTION dfti_create_descriptor_highd - - ! overloading of DftiCreateDescriptor for SP 1D DFT - ! second parameter (precision) should be any REAL*4 value - ! for dispatching during compile time - FUNCTION dfti_create_descriptor_s_1d(desc, s, dom, one, dim) - USE MKL_DFT_TYPE - !DEC$ ATTRIBUTES C :: dfti_create_descriptor_s_1d - !DEC$ ATTRIBUTES REFERENCE :: dfti_create_descriptor_s_1d - INTEGER dfti_create_descriptor_s_1d - TYPE(DFTI_DESCRIPTOR), POINTER :: desc - REAL(DFTI_SPKP), INTENT(IN) :: s - INTEGER, INTENT(IN) :: dom - INTEGER, INTENT(IN) :: one - INTEGER, INTENT(IN) :: dim - END FUNCTION dfti_create_descriptor_s_1d - - ! overloading of DftiCreateDescriptor for SP nD DFT - ! second parameter (precision) should be any REAL*4 value - ! for dispatching during compile time - FUNCTION dfti_create_descriptor_s_md(desc, s, dom, many, dims) - USE MKL_DFT_TYPE - !DEC$ ATTRIBUTES C :: dfti_create_descriptor_s_md - !DEC$ ATTRIBUTES REFERENCE :: dfti_create_descriptor_s_md - INTEGER dfti_create_descriptor_s_md - TYPE(DFTI_DESCRIPTOR), POINTER :: desc - REAL(DFTI_SPKP), INTENT(IN) :: s - INTEGER, INTENT(IN) :: dom - INTEGER, INTENT(IN) :: many - INTEGER, INTENT(IN), DIMENSION(*) :: dims - END FUNCTION dfti_create_descriptor_s_md - - ! overloading of DftiCreateDescriptor for DP 1D DFT - ! second parameter (precision) should be any REAL*8 value - ! for dispatching during compile time - FUNCTION dfti_create_descriptor_d_1d(desc, d, dom, one, dim) - USE MKL_DFT_TYPE - !DEC$ ATTRIBUTES C :: dfti_create_descriptor_d_1d - !DEC$ ATTRIBUTES REFERENCE :: dfti_create_descriptor_d_1d - INTEGER dfti_create_descriptor_d_1d - TYPE(DFTI_DESCRIPTOR), POINTER :: desc - REAL(DFTI_DPKP), INTENT(IN) :: d - INTEGER, INTENT(IN) :: dom - INTEGER, INTENT(IN) :: one - INTEGER, INTENT(IN) :: dim - END FUNCTION dfti_create_descriptor_d_1d - - ! overloading of DftiCreateDescriptor for DP nD DFT - ! second parameter (precision) should be any REAL*8 value - ! for dispatching during compile time - FUNCTION dfti_create_descriptor_d_md(desc, d, dom, many, dims) - USE MKL_DFT_TYPE - !DEC$ ATTRIBUTES C :: dfti_create_descriptor_d_md - !DEC$ ATTRIBUTES REFERENCE :: dfti_create_descriptor_d_md - INTEGER dfti_create_descriptor_d_md - TYPE(DFTI_DESCRIPTOR), POINTER :: desc - REAL(DFTI_DPKP), INTENT(IN) :: d - INTEGER, INTENT(IN) :: dom - INTEGER, INTENT(IN) :: many - INTEGER, INTENT(IN), DIMENSION(*) :: dims - END FUNCTION dfti_create_descriptor_d_md - - END INTERFACE - - INTERFACE DftiCopyDescriptor - - FUNCTION dfti_copy_descriptor_external(desc, new_desc) - USE MKL_DFT_TYPE - !DEC$ ATTRIBUTES C :: dfti_copy_descriptor_external - !DEC$ ATTRIBUTES REFERENCE :: dfti_copy_descriptor_external - INTEGER dfti_copy_descriptor_external - TYPE(DFTI_DESCRIPTOR), POINTER :: desc - TYPE(DFTI_DESCRIPTOR), POINTER :: new_desc - END FUNCTION dfti_copy_descriptor_external - - END INTERFACE - - INTERFACE DftiCommitDescriptor - - FUNCTION dfti_commit_descriptor_external(desc) - USE MKL_DFT_TYPE - !DEC$ ATTRIBUTES C :: dfti_commit_descriptor_external - !DEC$ ATTRIBUTES REFERENCE :: dfti_commit_descriptor_external - INTEGER dfti_commit_descriptor_external - TYPE(DFTI_DESCRIPTOR), POINTER :: desc - END FUNCTION dfti_commit_descriptor_external - - END INTERFACE - - INTERFACE DftiSetValue - - ! overloading of DftiSetValue for integer value - FUNCTION dfti_set_value_intval(desc, OptName, IntVal) - USE MKL_DFT_TYPE - !DEC$ ATTRIBUTES C :: dfti_set_value_intval - !DEC$ ATTRIBUTES REFERENCE :: dfti_set_value_intval - INTEGER dfti_set_value_intval - INTEGER, INTENT(IN) :: OptName - INTEGER, INTENT(IN) :: IntVal - TYPE(DFTI_DESCRIPTOR), POINTER :: desc - END FUNCTION dfti_set_value_intval - - ! overloading of DftiSetValue for SP value - FUNCTION dfti_set_value_sglval(desc, OptName, sglval) - USE MKL_DFT_TYPE - !DEC$ ATTRIBUTES C :: dfti_set_value_sglval - !DEC$ ATTRIBUTES REFERENCE :: dfti_set_value_sglval - INTEGER dfti_set_value_sglval - INTEGER, INTENT(IN) :: OptName - REAL(DFTI_SPKP), INTENT(IN) :: sglval - TYPE(DFTI_DESCRIPTOR), POINTER :: desc - END FUNCTION dfti_set_value_sglval - - ! overloading of DftiSetValue for DP value - FUNCTION dfti_set_value_dblval(desc, OptName, DblVal) - USE MKL_DFT_TYPE - !DEC$ ATTRIBUTES C :: dfti_set_value_dblval - !DEC$ ATTRIBUTES REFERENCE :: dfti_set_value_dblval - INTEGER dfti_set_value_dblval - INTEGER, INTENT(IN) :: OptName - REAL(DFTI_DPKP), INTENT(IN) :: DblVal - TYPE(DFTI_DESCRIPTOR), POINTER :: desc - END FUNCTION dfti_set_value_dblval - - ! overloading of DftiSetValue for integer vector - FUNCTION dfti_set_value_intvec(desc, OptName, IntVec) - USE MKL_DFT_TYPE - !DEC$ ATTRIBUTES C :: dfti_set_value_intvec - !DEC$ ATTRIBUTES REFERENCE :: dfti_set_value_intvec - INTEGER dfti_set_value_intvec - INTEGER, INTENT(IN) :: OptName - INTEGER, INTENT(IN), DIMENSION(*) :: IntVec - TYPE(DFTI_DESCRIPTOR), POINTER :: desc - END FUNCTION dfti_set_value_intvec - - ! overloading of DftiSetValue for char vector - FUNCTION dfti_set_value_chars(desc, OptName, Chars) - USE MKL_DFT_TYPE - !DEC$ ATTRIBUTES C :: dfti_set_value_chars - !DEC$ ATTRIBUTES REFERENCE :: dfti_set_value_chars - INTEGER dfti_set_value_chars - INTEGER, INTENT(IN) :: OptName - CHARACTER(*), INTENT(IN) :: Chars - TYPE(DFTI_DESCRIPTOR), POINTER :: desc - END FUNCTION dfti_set_value_chars - - END INTERFACE - - INTERFACE DftiGetValue - - ! overloading of DftiGetValue for integer value - FUNCTION dfti_get_value_intval(desc, OptName, IntVal) - USE MKL_DFT_TYPE - !DEC$ ATTRIBUTES C :: dfti_get_value_intval - !DEC$ ATTRIBUTES REFERENCE :: dfti_get_value_intval - INTEGER dfti_get_value_intval - INTEGER, INTENT(IN) :: OptName - INTEGER, INTENT(OUT) :: IntVal - TYPE(DFTI_DESCRIPTOR), POINTER :: desc - END FUNCTION dfti_get_value_intval - - ! overloading of DftiGetValue for SP value - FUNCTION dfti_get_value_sglval(desc, OptName, sglval) - USE MKL_DFT_TYPE - !DEC$ ATTRIBUTES C :: dfti_get_value_sglval - !DEC$ ATTRIBUTES REFERENCE :: dfti_get_value_sglval - INTEGER dfti_get_value_sglval - INTEGER, INTENT(IN) :: OptName - REAL(DFTI_SPKP), INTENT(OUT) :: sglval - TYPE(DFTI_DESCRIPTOR), POINTER :: desc - END FUNCTION dfti_get_value_sglval - - ! overloading of DftiGetValue for DP value - FUNCTION dfti_get_value_dblval(desc, OptName, DblVal) - USE MKL_DFT_TYPE - !DEC$ ATTRIBUTES C :: dfti_get_value_dblval - !DEC$ ATTRIBUTES REFERENCE :: dfti_get_value_dblval - INTEGER dfti_get_value_dblval - INTEGER, INTENT(IN) :: OptName - REAL(DFTI_DPKP), INTENT(OUT) :: DblVal - TYPE(DFTI_DESCRIPTOR), POINTER :: desc - END FUNCTION dfti_get_value_dblval - - ! overloading of DftiGetValue for integer vector - FUNCTION dfti_get_value_intvec(desc, OptName, IntVec) - USE MKL_DFT_TYPE - !DEC$ ATTRIBUTES C :: dfti_get_value_intvec - !DEC$ ATTRIBUTES REFERENCE :: dfti_get_value_intvec - INTEGER dfti_get_value_intvec - INTEGER, INTENT(IN) :: OptName - INTEGER, INTENT(OUT), DIMENSION(*) :: IntVec - TYPE(DFTI_DESCRIPTOR), POINTER :: desc - END FUNCTION dfti_get_value_intvec - - ! overloading of DftiGetValue for char vector - FUNCTION dfti_get_value_chars(desc, OptName, Chars) - USE MKL_DFT_TYPE - !DEC$ ATTRIBUTES C :: dfti_get_value_chars - !DEC$ ATTRIBUTES REFERENCE :: dfti_get_value_chars - INTEGER dfti_get_value_chars - INTEGER, INTENT(IN) :: OptName - CHARACTER(*), INTENT(OUT) :: Chars - TYPE(DFTI_DESCRIPTOR), POINTER :: desc - END FUNCTION dfti_get_value_chars - - END INTERFACE - - INTERFACE DftiComputeForward - - ! overloading of DftiComputeForward for SP R2C DFT (inplace) - FUNCTION dfti_compute_forward_s(desc,sSrcDst) - USE MKL_DFT_TYPE - !DEC$ ATTRIBUTES C :: dfti_compute_forward_s - !DEC$ ATTRIBUTES REFERENCE :: dfti_compute_forward_s - INTEGER dfti_compute_forward_s - TYPE(DFTI_DESCRIPTOR), POINTER :: desc - REAL(DFTI_SPKP), INTENT(INOUT), DIMENSION(*) :: sSrcDst - END FUNCTION dfti_compute_forward_s - - ! overloading of DftiComputeForward for SP C2C DFT (inplace) - FUNCTION dfti_compute_forward_c(desc,cSrcDst) - USE MKL_DFT_TYPE - !DEC$ ATTRIBUTES C :: dfti_compute_forward_c - !DEC$ ATTRIBUTES REFERENCE :: dfti_compute_forward_c - INTEGER dfti_compute_forward_c - TYPE(DFTI_DESCRIPTOR), POINTER :: desc - COMPLEX(DFTI_SPKP), INTENT(INOUT), DIMENSION(*) :: cSrcDst - END FUNCTION dfti_compute_forward_c - - ! overloading of DftiComputeForward for SP C2C DFT (inplace, split complex) - FUNCTION dfti_compute_forward_ss(desc,sSrcDstRe,sSrcDstIm) - USE MKL_DFT_TYPE - !DEC$ ATTRIBUTES C :: dfti_compute_forward_ss - !DEC$ ATTRIBUTES REFERENCE :: dfti_compute_forward_ss - INTEGER dfti_compute_forward_ss - TYPE(DFTI_DESCRIPTOR), POINTER :: desc - REAL(DFTI_SPKP), DIMENSION(*) :: sSrcDstRe - REAL(DFTI_SPKP), DIMENSION(*) :: sSrcDstIm - END FUNCTION dfti_compute_forward_ss - - ! overloading of DftiComputeForward for SP R2C DFT (out-of-place) - FUNCTION dfti_compute_forward_sc(desc,sSrc,cDst) - USE MKL_DFT_TYPE - !DEC$ ATTRIBUTES C :: dfti_compute_forward_sc - !DEC$ ATTRIBUTES REFERENCE :: dfti_compute_forward_sc - INTEGER dfti_compute_forward_sc - TYPE(DFTI_DESCRIPTOR), POINTER :: desc - REAL(DFTI_SPKP), INTENT(IN), DIMENSION(*) :: sSrc - COMPLEX(DFTI_SPKP), INTENT(OUT), DIMENSION(*) :: cDst - END FUNCTION dfti_compute_forward_sc - - ! overloading of DftiComputeForward for SP C2C DFT (out-of-place) - FUNCTION dfti_compute_forward_cc(desc,cSrc,cDst) - USE MKL_DFT_TYPE - !DEC$ ATTRIBUTES C :: dfti_compute_forward_cc - !DEC$ ATTRIBUTES REFERENCE :: dfti_compute_forward_cc - INTEGER dfti_compute_forward_cc - TYPE(DFTI_DESCRIPTOR), POINTER :: desc - COMPLEX(DFTI_SPKP), INTENT(IN), DIMENSION(*) :: cSrc - COMPLEX(DFTI_SPKP), INTENT(OUT), DIMENSION(*) :: cDst - END FUNCTION dfti_compute_forward_cc - - ! overloading of DftiComputeForward for SP C2C DFT (out-of-place, split - ! complex) - FUNCTION dfti_compute_forward_ssss(desc,sSrcRe,sSrcIm,sDstRe,sDstIm) - USE MKL_DFT_TYPE - !DEC$ ATTRIBUTES C :: dfti_compute_forward_ssss - !DEC$ ATTRIBUTES REFERENCE :: dfti_compute_forward_ssss - INTEGER dfti_compute_forward_ssss - TYPE(DFTI_DESCRIPTOR), POINTER :: desc - REAL(DFTI_SPKP), INTENT(IN), DIMENSION(*) :: sSrcRe - REAL(DFTI_SPKP), INTENT(IN), DIMENSION(*) :: sSrcIm - REAL(DFTI_SPKP), INTENT(OUT), DIMENSION(*) :: sDstRe - REAL(DFTI_SPKP), INTENT(OUT), DIMENSION(*) :: sDstIm - END FUNCTION dfti_compute_forward_ssss - - ! overloading of DftiComputeForward for DP R2C DFT (inplace) - FUNCTION dfti_compute_forward_d(desc,dSrcDst) - USE MKL_DFT_TYPE - !DEC$ ATTRIBUTES C :: dfti_compute_forward_d - !DEC$ ATTRIBUTES REFERENCE :: dfti_compute_forward_d - INTEGER dfti_compute_forward_d - TYPE(DFTI_DESCRIPTOR), POINTER :: desc - REAL(DFTI_DPKP), INTENT(INOUT), DIMENSION(*) :: dSrcDst - END FUNCTION dfti_compute_forward_d - - ! overloading of DftiComputeForward for DP C2C DFT (inplace) - FUNCTION dfti_compute_forward_z(desc,zSrcDst) - USE MKL_DFT_TYPE - !DEC$ ATTRIBUTES C :: dfti_compute_forward_z - !DEC$ ATTRIBUTES REFERENCE :: dfti_compute_forward_z - INTEGER dfti_compute_forward_z - TYPE(DFTI_DESCRIPTOR), POINTER :: desc - COMPLEX(DFTI_DPKP), INTENT(INOUT), DIMENSION(*) :: zSrcDst - END FUNCTION dfti_compute_forward_z - - ! overloading of DftiComputeForward for DP C2C DFT (inplace, split complex) - FUNCTION dfti_compute_forward_dd(desc,dSrcDstRe,dSrcDstIm) - USE MKL_DFT_TYPE - !DEC$ ATTRIBUTES C :: dfti_compute_forward_dd - !DEC$ ATTRIBUTES REFERENCE :: dfti_compute_forward_dd - INTEGER dfti_compute_forward_dd - TYPE(DFTI_DESCRIPTOR), POINTER :: desc - REAL(DFTI_DPKP), DIMENSION(*) :: dSrcDstRe - REAL(DFTI_DPKP), DIMENSION(*) :: dSrcDstIm - END FUNCTION dfti_compute_forward_dd - - ! overloading of DftiComputeForward for DP R2C DFT (out-of-place) - FUNCTION dfti_compute_forward_dz(desc,dSrc,zDst) - USE MKL_DFT_TYPE - !DEC$ ATTRIBUTES C :: dfti_compute_forward_dz - !DEC$ ATTRIBUTES REFERENCE :: dfti_compute_forward_dz - INTEGER dfti_compute_forward_dz - TYPE(DFTI_DESCRIPTOR), POINTER :: desc - REAL(DFTI_DPKP), INTENT(IN), DIMENSION(*) :: dSrc - COMPLEX(DFTI_DPKP), INTENT(OUT), DIMENSION(*) :: zDst - END FUNCTION dfti_compute_forward_dz - - ! overloading of DftiComputeForward for DP C2C DFT (out-of-place) - FUNCTION dfti_compute_forward_zz(desc,zSrc,zDst) - USE MKL_DFT_TYPE - !DEC$ ATTRIBUTES C :: dfti_compute_forward_zz - !DEC$ ATTRIBUTES REFERENCE :: dfti_compute_forward_zz - INTEGER dfti_compute_forward_zz - TYPE(DFTI_DESCRIPTOR), POINTER :: desc - COMPLEX(DFTI_DPKP), INTENT(IN), DIMENSION(*) :: zSrc - COMPLEX(DFTI_DPKP), INTENT(OUT), DIMENSION(*) :: zDst - END FUNCTION dfti_compute_forward_zz - - ! overloading of DftiComputeForward for DP C2C DFT (out-of-place, split - ! complex) - FUNCTION dfti_compute_forward_dddd(desc,dSrcRe,dSrcIm,dDstRe,dDstIm) - USE MKL_DFT_TYPE - !DEC$ ATTRIBUTES C :: dfti_compute_forward_dddd - !DEC$ ATTRIBUTES REFERENCE :: dfti_compute_forward_dddd - INTEGER dfti_compute_forward_dddd - TYPE(DFTI_DESCRIPTOR), POINTER :: desc - REAL(DFTI_DPKP), INTENT(IN), DIMENSION(*) :: dSrcRe - REAL(DFTI_DPKP), INTENT(IN), DIMENSION(*) :: dSrcIm - REAL(DFTI_DPKP), INTENT(OUT), DIMENSION(*) :: dDstRe - REAL(DFTI_DPKP), INTENT(OUT), DIMENSION(*) :: dDstIm - END FUNCTION dfti_compute_forward_dddd - - END INTERFACE DftiComputeForward - - INTERFACE DftiComputeBackward - - - ! overloading of DftiComputeBackward for SP C2R DFT (inplace) - FUNCTION dfti_compute_backward_s(desc,sSrcDst) - USE MKL_DFT_TYPE - !DEC$ ATTRIBUTES C :: dfti_compute_backward_s - !DEC$ ATTRIBUTES REFERENCE :: dfti_compute_backward_s - INTEGER dfti_compute_backward_s - TYPE(DFTI_DESCRIPTOR), POINTER :: desc - REAL(DFTI_SPKP), INTENT(INOUT), DIMENSION(*) :: sSrcDst - END FUNCTION dfti_compute_backward_s - - ! overloading of DftiComputeBackward for SP C2C DFT (inplace) - FUNCTION dfti_compute_backward_c(desc,cSrcDst) - USE MKL_DFT_TYPE - !DEC$ ATTRIBUTES C :: dfti_compute_backward_c - !DEC$ ATTRIBUTES REFERENCE :: dfti_compute_backward_c - INTEGER dfti_compute_backward_c - TYPE(DFTI_DESCRIPTOR), POINTER :: desc - COMPLEX(DFTI_SPKP), INTENT(INOUT), DIMENSION(*) :: cSrcDst - END FUNCTION dfti_compute_backward_c - - ! overloading of DftiComputeBackward for SP C2C DFT (inplace, split complex) - FUNCTION dfti_compute_backward_ss(desc,sSrcDstRe,sSrcDstIm) - USE MKL_DFT_TYPE - !DEC$ ATTRIBUTES C :: dfti_compute_backward_ss - !DEC$ ATTRIBUTES REFERENCE :: dfti_compute_backward_ss - INTEGER dfti_compute_backward_ss - TYPE(DFTI_DESCRIPTOR), POINTER :: desc - REAL(DFTI_SPKP), DIMENSION(*) :: sSrcDstRe - REAL(DFTI_SPKP), DIMENSION(*) :: sSrcDstIm - END FUNCTION dfti_compute_backward_ss - - ! overloading of DftiComputeBackward for SP C2R DFT (out-of-place) - FUNCTION dfti_compute_backward_cs(desc,cSrc,sDst) - USE MKL_DFT_TYPE - !DEC$ ATTRIBUTES C :: dfti_compute_backward_cs - !DEC$ ATTRIBUTES REFERENCE :: dfti_compute_backward_cs - INTEGER dfti_compute_backward_cs - TYPE(DFTI_DESCRIPTOR), POINTER :: desc - COMPLEX(DFTI_SPKP), INTENT(IN), DIMENSION(*) :: cSrc - REAL(DFTI_SPKP), INTENT(OUT), DIMENSION(*) :: sDst - END FUNCTION dfti_compute_backward_cs - - ! overloading of DftiComputeBackward for SP C2C DFT (out-of-place) - FUNCTION dfti_compute_backward_cc(desc,cSrc,cDst) - USE MKL_DFT_TYPE - !DEC$ ATTRIBUTES C :: dfti_compute_backward_cc - !DEC$ ATTRIBUTES REFERENCE :: dfti_compute_backward_cc - INTEGER dfti_compute_backward_cc - TYPE(DFTI_DESCRIPTOR), POINTER :: desc - COMPLEX(DFTI_SPKP), INTENT(IN), DIMENSION(*) :: cSrc - COMPLEX(DFTI_SPKP), INTENT(OUT), DIMENSION(*) :: cDst - END FUNCTION dfti_compute_backward_cc - - ! overloading of DftiComputeBackward for SP C2C DFT (out-of-place, split - ! complex) - FUNCTION dfti_compute_backward_ssss(desc,sSrcRe,sSrcIm,sDstRe,sDstIm) - USE MKL_DFT_TYPE - !DEC$ ATTRIBUTES C :: dfti_compute_backward_ssss - !DEC$ ATTRIBUTES REFERENCE :: dfti_compute_backward_ssss - INTEGER dfti_compute_backward_ssss - TYPE(DFTI_DESCRIPTOR), POINTER :: desc - REAL(DFTI_SPKP), INTENT(IN), DIMENSION(*) :: sSrcRe - REAL(DFTI_SPKP), INTENT(IN), DIMENSION(*) :: sSrcIm - REAL(DFTI_SPKP), INTENT(OUT), DIMENSION(*) :: sDstRe - REAL(DFTI_SPKP), INTENT(OUT), DIMENSION(*) :: sDstIm - END FUNCTION dfti_compute_backward_ssss - - ! overloading of DftiComputeBackward for DP C2R DFT (inplace) - FUNCTION dfti_compute_backward_d(desc,dSrcDst) - USE MKL_DFT_TYPE - !DEC$ ATTRIBUTES C :: dfti_compute_backward_d - !DEC$ ATTRIBUTES REFERENCE :: dfti_compute_backward_d - INTEGER dfti_compute_backward_d - TYPE(DFTI_DESCRIPTOR), POINTER :: desc - REAL(DFTI_DPKP), INTENT(INOUT), DIMENSION(*) :: dSrcDst - END FUNCTION dfti_compute_backward_d - - ! overloading of DftiComputeBackward for DP C2C DFT (inplace) - FUNCTION dfti_compute_backward_z(desc,zSrcDst) - USE MKL_DFT_TYPE - !DEC$ ATTRIBUTES C :: dfti_compute_backward_z - !DEC$ ATTRIBUTES REFERENCE :: dfti_compute_backward_z - INTEGER dfti_compute_backward_z - TYPE(DFTI_DESCRIPTOR), POINTER :: desc - COMPLEX(DFTI_DPKP), INTENT(INOUT), DIMENSION(*) :: zSrcDst - END FUNCTION dfti_compute_backward_z - - ! overloading of DftiComputeBackward for DP C2C DFT (inplace, split complex) - FUNCTION dfti_compute_backward_dd(desc,dSrcDstRe,dSrcDstIm) - USE MKL_DFT_TYPE - !DEC$ ATTRIBUTES C :: dfti_compute_backward_dd - !DEC$ ATTRIBUTES REFERENCE :: dfti_compute_backward_dd - INTEGER dfti_compute_backward_dd - TYPE(DFTI_DESCRIPTOR), POINTER :: desc - REAL(DFTI_DPKP), DIMENSION(*) :: dSrcDstRe - REAL(DFTI_DPKP), DIMENSION(*) :: dSrcDstIm - END FUNCTION dfti_compute_backward_dd - - ! overloading of DftiComputeBackward for DP C2R DFT (out-of-place) - FUNCTION dfti_compute_backward_zd(desc,zSrc,dDst) - USE MKL_DFT_TYPE - !DEC$ ATTRIBUTES C :: dfti_compute_backward_zd - !DEC$ ATTRIBUTES REFERENCE :: dfti_compute_backward_zd - INTEGER dfti_compute_backward_zd - TYPE(DFTI_DESCRIPTOR), POINTER :: desc - COMPLEX(DFTI_DPKP), INTENT(IN), DIMENSION(*) :: zSrc - REAL(DFTI_DPKP), INTENT(OUT), DIMENSION(*) :: dDst - END FUNCTION dfti_compute_backward_zd - - ! overloading of DftiComputeBackward for DP C2C DFT (out-of-place) - FUNCTION dfti_compute_backward_zz(desc,zSrc,zDst) - USE MKL_DFT_TYPE - !DEC$ ATTRIBUTES C :: dfti_compute_backward_zz - !DEC$ ATTRIBUTES REFERENCE :: dfti_compute_backward_zz - INTEGER dfti_compute_backward_zz - TYPE(DFTI_DESCRIPTOR), POINTER :: desc - COMPLEX(DFTI_DPKP), INTENT(IN), DIMENSION(*) :: zSrc - COMPLEX(DFTI_DPKP), INTENT(OUT), DIMENSION(*) :: zDst - END FUNCTION dfti_compute_backward_zz - - ! overloading of DftiComputeBackward for DP C2C DFT (out-of-place, split - ! complex) - FUNCTION dfti_compute_backward_dddd(desc,dSrcRe,dSrcIm,dDstRe,dDstIm) - USE MKL_DFT_TYPE - !DEC$ ATTRIBUTES C :: dfti_compute_backward_dddd - !DEC$ ATTRIBUTES REFERENCE :: dfti_compute_backward_dddd - INTEGER dfti_compute_backward_dddd - TYPE(DFTI_DESCRIPTOR), POINTER :: desc - REAL(DFTI_DPKP), INTENT(IN), DIMENSION(*) :: dSrcRe - REAL(DFTI_DPKP), INTENT(IN), DIMENSION(*) :: dSrcIm - REAL(DFTI_DPKP), INTENT(OUT), DIMENSION(*) :: dDstRe - REAL(DFTI_DPKP), INTENT(OUT), DIMENSION(*) :: dDstIm - END FUNCTION dfti_compute_backward_dddd - - END INTERFACE DftiComputeBackward - - INTERFACE DftiFreeDescriptor - - FUNCTION dfti_free_descriptor_external(desc) - USE MKL_DFT_TYPE - !DEC$ ATTRIBUTES C :: dfti_free_descriptor_external - !DEC$ ATTRIBUTES REFERENCE :: dfti_free_descriptor_external - INTEGER dfti_free_descriptor_external - TYPE(DFTI_DESCRIPTOR), POINTER :: desc - END FUNCTION dfti_free_descriptor_external - - END INTERFACE - - INTERFACE DftiErrorClass - - FUNCTION dfti_error_class_external(Status, ErrorClass) - USE MKL_DFT_TYPE - !DEC$ ATTRIBUTES C :: dfti_error_class_external - !DEC$ ATTRIBUTES REFERENCE :: dfti_error_class_external - LOGICAL dfti_error_class_external - INTEGER, INTENT(IN) :: Status - INTEGER, INTENT(IN) :: ErrorClass - END FUNCTION dfti_error_class_external - - END INTERFACE - - INTERFACE DftiErrorMessage - - FUNCTION dfti_error_message_external(Status) - USE MKL_DFT_TYPE - !DEC$ ATTRIBUTES C :: dfti_error_message_external - !DEC$ ATTRIBUTES REFERENCE :: dfti_error_message_external - CHARACTER(LEN=DFTI_MAX_MESSAGE_LENGTH) :: dfti_error_message_external - INTEGER, INTENT(IN) :: Status - END FUNCTION dfti_error_message_external - - END INTERFACE - -END MODULE MKL_DFTI diff --git a/decomp2d/transpose_x_to_y.inc b/decomp2d/transpose_x_to_y.inc deleted file mode 100644 index ddb7f35..0000000 --- a/decomp2d/transpose_x_to_y.inc +++ /dev/null @@ -1,514 +0,0 @@ -! -*- mode: f90 -*- -!======================================================================= -! This is part of the 2DECOMP&FFT library -! -! 2DECOMP&FFT is a software framework for general-purpose 2D (pencil) -! decomposition. It also implements a highly scalable distributed -! three-dimensional Fast Fourier Transform (FFT). -! -! Copyright (C) 2009-2011 Ning Li, the Numerical Algorithms Group (NAG) -! -!======================================================================= - -! This file contains the routines that transpose data from X to Y pencil - -subroutine transpose_x_to_y_real(src, dst, opt_decomp) - - implicit none - - real(mytype), dimension(:,:,:), intent(IN) :: src - real(mytype), dimension(:,:,:), intent(OUT) :: dst - TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp - - TYPE(DECOMP_INFO) :: decomp - -#ifdef SHM - real(mytype) :: work1(*), work2(*) - POINTER (work1_p, work1), (work2_p, work2) ! Cray pointers -#endif - - integer :: s1,s2,s3,d1,d2,d3 - integer :: ierror - - if (present(opt_decomp)) then - decomp = opt_decomp - else - decomp = decomp_main - end if - - s1 = SIZE(src,1) - s2 = SIZE(src,2) - s3 = SIZE(src,3) - d1 = SIZE(dst,1) - d2 = SIZE(dst,2) - d3 = SIZE(dst,3) - - ! rearrange source array as send buffer -#ifdef SHM - work1_p = decomp%COL_INFO%SND_P - call mem_split_xy_real(src, s1, s2, s3, work1, dims(1), & - decomp%x1dist, decomp) -#else - call mem_split_xy_real(src, s1, s2, s3, work1_r, dims(1), & - decomp%x1dist, decomp) -#endif - - ! define receive buffer -#ifdef SHM - work2_p = decomp%COL_INFO%RCV_P - call MPI_BARRIER(decomp%COL_INFO%CORE_COMM, ierror) -#endif - - ! transpose using MPI_ALLTOALL(V) -#ifdef SHM - if (decomp%COL_INFO%CORE_ME==1) THEN - call MPI_ALLTOALLV(work1, decomp%x1cnts_s, decomp%x1disp_s, & - real_type, work2, decomp%y1cnts_s, decomp%y1disp_s, & - real_type, decomp%COL_INFO%SMP_COMM, ierror) - end if -#else -#ifdef EVEN - call MPI_ALLTOALL(work1_r, decomp%x1count, & - real_type, work2_r, decomp%y1count, & - real_type, DECOMP_2D_COMM_COL, ierror) -#else - call MPI_ALLTOALLV(work1_r, decomp%x1cnts, decomp%x1disp, & - real_type, work2_r, decomp%y1cnts, decomp%y1disp, & - real_type, DECOMP_2D_COMM_COL, ierror) -#endif -#endif - - ! rearrange receive buffer -#ifdef SHM - call MPI_BARRIER(decomp%COL_INFO%CORE_COMM, ierror) - call mem_merge_xy_real(work2, d1, d2, d3, dst, dims(1), & - decomp%y1dist, decomp) -#else - call mem_merge_xy_real(work2_r, d1, d2, d3, dst, dims(1), & - decomp%y1dist, decomp) -#endif - - return -end subroutine transpose_x_to_y_real - - -#ifdef OCC -subroutine transpose_x_to_y_real_start(handle, src, dst, sbuf, rbuf, & - opt_decomp) - - implicit none - - integer :: handle - real(mytype), dimension(:,:,:) :: src, dst, sbuf, rbuf - TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp - - TYPE(DECOMP_INFO) :: decomp - - integer :: s1,s2,s3 - integer :: ierror - - if (present(opt_decomp)) then - decomp = opt_decomp - else - decomp = decomp_main - end if - - s1 = SIZE(src,1) - s2 = SIZE(src,2) - s3 = SIZE(src,3) - - ! rearrange source array as send buffer - call mem_split_xy_real(src, s1, s2, s3, sbuf, dims(1), & - decomp%x1dist, decomp) - -#ifdef EVEN - call NBC_IALLTOALL(sbuf, decomp%x1count, real_type, & - rbuf, decomp%y1count, real_type, & - DECOMP_2D_COMM_COL, handle, ierror) -#else - call NBC_IALLTOALLV(sbuf, decomp%x1cnts, decomp%x1disp, real_type, & - rbuf, decomp%y1cnts, decomp%y1disp, real_type, & - DECOMP_2D_COMM_COL, handle, ierror) -#endif - - return -end subroutine transpose_x_to_y_real_start - - -subroutine transpose_x_to_y_real_wait(handle, src, dst, sbuf, rbuf, & - opt_decomp) - - implicit none - - integer :: handle - real(mytype), dimension(:,:,:) :: src, dst, sbuf, rbuf - TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp - - TYPE(DECOMP_INFO) :: decomp - - integer :: d1,d2,d3 - integer :: ierror - - if (present(opt_decomp)) then - decomp = opt_decomp - else - decomp = decomp_main - end if - - d1 = SIZE(dst,1) - d2 = SIZE(dst,2) - d3 = SIZE(dst,3) - - call NBC_WAIT(handle, ierror) - - ! rearrange receive buffer - call mem_merge_xy_real(rbuf, d1, d2, d3, dst, dims(1), & - decomp%y1dist, decomp) - - return -end subroutine transpose_x_to_y_real_wait -#endif - - -subroutine transpose_x_to_y_complex(src, dst, opt_decomp) - - implicit none - - complex(mytype), dimension(:,:,:), intent(IN) :: src - complex(mytype), dimension(:,:,:), intent(OUT) :: dst - TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp - - TYPE(DECOMP_INFO) :: decomp - -#ifdef SHM - complex(mytype) :: work1(*), work2(*) - POINTER (work1_p, work1), (work2_p, work2) ! Cray pointers -#endif - - integer :: s1,s2,s3,d1,d2,d3 - integer :: ierror - - if (present(opt_decomp)) then - decomp = opt_decomp - else - decomp = decomp_main - end if - - s1 = SIZE(src,1) - s2 = SIZE(src,2) - s3 = SIZE(src,3) - d1 = SIZE(dst,1) - d2 = SIZE(dst,2) - d3 = SIZE(dst,3) - - ! rearrange source array as send buffer -#ifdef SHM - work1_p = decomp%COL_INFO%SND_P_c - call mem_split_xy_complex(src, s1, s2, s3, work1, dims(1), & - decomp%x1dist, decomp) -#else - call mem_split_xy_complex(src, s1, s2, s3, work1_c, dims(1), & - decomp%x1dist, decomp) -#endif - - ! define receive buffer -#ifdef SHM - work2_p = decomp%COL_INFO%RCV_P_c - call MPI_BARRIER(decomp%COL_INFO%CORE_COMM, ierror) -#endif - - ! transpose using MPI_ALLTOALL(V) -#ifdef SHM - if (decomp%COL_INFO%CORE_ME==1) THEN - call MPI_ALLTOALLV(work1, decomp%x1cnts_s, decomp%x1disp_s, & - complex_type, work2, decomp%y1cnts_s, decomp%y1disp_s, & - complex_type, decomp%COL_INFO%SMP_COMM, ierror) - end if -#else -#ifdef EVEN - call MPI_ALLTOALL(work1_c, decomp%x1count, & - complex_type, work2_c, decomp%y1count, & - complex_type, DECOMP_2D_COMM_COL, ierror) -#else - call MPI_ALLTOALLV(work1_c, decomp%x1cnts, decomp%x1disp, & - complex_type, work2_c, decomp%y1cnts, decomp%y1disp, & - complex_type, DECOMP_2D_COMM_COL, ierror) -#endif -#endif - - ! rearrange receive buffer -#ifdef SHM - call MPI_BARRIER(decomp%COL_INFO%CORE_COMM, ierror) - call mem_merge_xy_complex(work2, d1, d2, d3, dst, dims(1), & - decomp%y1dist, decomp) -#else - call mem_merge_xy_complex(work2_c, d1, d2, d3, dst, dims(1), & - decomp%y1dist, decomp) -#endif - - return -end subroutine transpose_x_to_y_complex - - -#ifdef OCC -subroutine transpose_x_to_y_complex_start(handle, src, dst, sbuf, & - rbuf, opt_decomp) - - implicit none - - integer :: handle - complex(mytype), dimension(:,:,:) :: src, dst, sbuf, rbuf - TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp - - TYPE(DECOMP_INFO) :: decomp - - integer :: s1,s2,s3 - integer :: ierror - - if (present(opt_decomp)) then - decomp = opt_decomp - else - decomp = decomp_main - end if - - s1 = SIZE(src,1) - s2 = SIZE(src,2) - s3 = SIZE(src,3) - - ! rearrange source array as send buffer - call mem_split_xy_complex(src, s1, s2, s3, sbuf, dims(1), & - decomp%x1dist, decomp) - -#ifdef EVEN - call NBC_IALLTOALL(sbuf, decomp%x1count, & - complex_type, rbuf, decomp%y1count, & - complex_type, DECOMP_2D_COMM_COL, handle, ierror) -#else - call NBC_IALLTOALLV(sbuf, decomp%x1cnts, decomp%x1disp, & - complex_type, rbuf, decomp%y1cnts, decomp%y1disp, & - complex_type, DECOMP_2D_COMM_COL, handle, ierror) -#endif - - return -end subroutine transpose_x_to_y_complex_start - - -subroutine transpose_x_to_y_complex_wait(handle, src, dst, sbuf, & - rbuf, opt_decomp) - - implicit none - - integer :: handle - complex(mytype), dimension(:,:,:) :: src, dst, sbuf, rbuf - TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp - - TYPE(DECOMP_INFO) :: decomp - - integer :: d1,d2,d3 - integer :: ierror - - if (present(opt_decomp)) then - decomp = opt_decomp - else - decomp = decomp_main - end if - - d1 = SIZE(dst,1) - d2 = SIZE(dst,2) - d3 = SIZE(dst,3) - - call NBC_WAIT(handle, ierror) - - ! rearrange receive buffer - call mem_merge_xy_complex(rbuf, d1, d2, d3, dst, dims(1), & - decomp%y1dist, decomp) - - return -end subroutine transpose_x_to_y_complex_wait -#endif - - -! pack/unpack ALLTOALL(V) buffers - -subroutine mem_split_xy_real(in,n1,n2,n3,out,iproc,dist,decomp) - - implicit none - - integer, intent(IN) :: n1,n2,n3 - real(mytype), dimension(n1,n2,n3), intent(IN) :: in - real(mytype), dimension(*), intent(OUT) :: out - integer, intent(IN) :: iproc - integer, dimension(0:iproc-1), intent(IN) :: dist - TYPE(DECOMP_INFO), intent(IN) :: decomp - - integer :: i,j,k, m,i1,i2,pos - - do m=0,iproc-1 - if (m==0) then - i1 = 1 - i2 = dist(0) - else - i1 = i2+1 - i2 = i1+dist(m)-1 - end if - -#ifdef SHM - pos = decomp%x1disp_o(m) + 1 -#else -#ifdef EVEN - pos = m * decomp%x1count + 1 -#else - pos = decomp%x1disp(m) + 1 -#endif -#endif - - do k=1,n3 - do j=1,n2 - do i=i1,i2 - out(pos) = in(i,j,k) - pos = pos + 1 - end do - end do - end do - end do - - return -end subroutine mem_split_xy_real - - -subroutine mem_split_xy_complex(in,n1,n2,n3,out,iproc,dist,decomp) - - implicit none - - integer, intent(IN) :: n1,n2,n3 - complex(mytype), dimension(n1,n2,n3), intent(IN) :: in - complex(mytype), dimension(*), intent(OUT) :: out - integer, intent(IN) :: iproc - integer, dimension(0:iproc-1), intent(IN) :: dist - TYPE(DECOMP_INFO), intent(IN) :: decomp - - integer :: i,j,k, m,i1,i2,pos - - do m=0,iproc-1 - if (m==0) then - i1 = 1 - i2 = dist(0) - else - i1 = i2+1 - i2 = i1+dist(m)-1 - end if - -#ifdef SHM - pos = decomp%x1disp_o(m) + 1 -#else -#ifdef EVEN - pos = m * decomp%x1count + 1 -#else - pos = decomp%x1disp(m) + 1 -#endif -#endif - - do k=1,n3 - do j=1,n2 - do i=i1,i2 - out(pos) = in(i,j,k) - pos = pos + 1 - end do - end do - end do - end do - - return -end subroutine mem_split_xy_complex - - -subroutine mem_merge_xy_real(in,n1,n2,n3,out,iproc,dist,decomp) - - implicit none - - integer, intent(IN) :: n1,n2,n3 - real(mytype), dimension(*), intent(IN) :: in - real(mytype), dimension(n1,n2,n3), intent(OUT) :: out - integer, intent(IN) :: iproc - integer, dimension(0:iproc-1), intent(IN) :: dist - TYPE(DECOMP_INFO), intent(IN) :: decomp - - integer :: i,j,k, m,i1,i2, pos - - do m=0,iproc-1 - if (m==0) then - i1 = 1 - i2 = dist(0) - else - i1 = i2+1 - i2 = i1+dist(m)-1 - end if - -#ifdef SHM - pos = decomp%y1disp_o(m) + 1 -#else -#ifdef EVEN - pos = m * decomp%y1count + 1 -#else - pos = decomp%y1disp(m) + 1 -#endif -#endif - - do k=1,n3 - do j=i1,i2 - do i=1,n1 - out(i,j,k) = in(pos) - pos = pos + 1 - end do - end do - end do - end do - - return -end subroutine mem_merge_xy_real - - -subroutine mem_merge_xy_complex(in,n1,n2,n3,out,iproc,dist,decomp) - - implicit none - - integer, intent(IN) :: n1,n2,n3 - complex(mytype), dimension(*), intent(IN) :: in - complex(mytype), dimension(n1,n2,n3), intent(OUT) :: out - integer, intent(IN) :: iproc - integer, dimension(0:iproc-1), intent(IN) :: dist - TYPE(DECOMP_INFO), intent(IN) :: decomp - - integer :: i,j,k, m,i1,i2, pos - - do m=0,iproc-1 - if (m==0) then - i1 = 1 - i2 = dist(0) - else - i1 = i2+1 - i2 = i1+dist(m)-1 - end if - -#ifdef SHM - pos = decomp%y1disp_o(m) + 1 -#else -#ifdef EVEN - pos = m * decomp%y1count + 1 -#else - pos = decomp%y1disp(m) + 1 -#endif -#endif - - do k=1,n3 - do j=i1,i2 - do i=1,n1 - out(i,j,k) = in(pos) - pos = pos + 1 - end do - end do - end do - end do - - return -end subroutine mem_merge_xy_complex diff --git a/decomp2d/transpose_y_to_x.inc b/decomp2d/transpose_y_to_x.inc deleted file mode 100644 index ec82699..0000000 --- a/decomp2d/transpose_y_to_x.inc +++ /dev/null @@ -1,513 +0,0 @@ -!======================================================================= -! This is part of the 2DECOMP&FFT library -! -! 2DECOMP&FFT is a software framework for general-purpose 2D (pencil) -! decomposition. It also implements a highly scalable distributed -! three-dimensional Fast Fourier Transform (FFT). -! -! Copyright (C) 2009-2011 Ning Li, the Numerical Algorithms Group (NAG) -! -!======================================================================= - -! This file contains the routines that transpose data from Y to X pencil - -subroutine transpose_y_to_x_real(src, dst, opt_decomp) - -implicit none - -real(mytype), dimension(:,:,:), intent(IN) :: src -real(mytype), dimension(:,:,:), intent(OUT) :: dst -TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp - -TYPE(DECOMP_INFO) :: decomp - -#ifdef SHM -real(mytype) :: work1(*), work2(*) -POINTER (work1_p, work1), (work2_p, work2) ! Cray pointers -#endif - -integer :: s1,s2,s3,d1,d2,d3 -integer :: ierror - -if (present(opt_decomp)) then -decomp = opt_decomp -else -decomp = decomp_main -end if - -s1 = SIZE(src,1) -s2 = SIZE(src,2) -s3 = SIZE(src,3) -d1 = SIZE(dst,1) -d2 = SIZE(dst,2) -d3 = SIZE(dst,3) - -! rearrange source array as send buffer -#ifdef SHM -work1_p = decomp%COL_INFO%SND_P -call mem_split_yx_real(src, s1, s2, s3, work1, dims(1), & -decomp%y1dist, decomp) -#else -call mem_split_yx_real(src, s1, s2, s3, work1_r, dims(1), & -decomp%y1dist, decomp) -#endif - -! define receive buffer -#ifdef SHM -work2_p = decomp%COL_INFO%RCV_P -call MPI_BARRIER(decomp%COL_INFO%CORE_COMM, ierror) -#endif - -! transpose using MPI_ALLTOALL(V) -#ifdef SHM -if (decomp%COL_INFO%CORE_ME==1) THEN -call MPI_ALLTOALLV(work1, decomp%y1cnts_s, decomp%y1disp_s, & -real_type, work2, decomp%x1cnts_s, decomp%x1disp_s, & -real_type, decomp%COL_INFO%SMP_COMM, ierror) -end if -#else -#ifdef EVEN -call MPI_ALLTOALL(work1_r, decomp%y1count, & -real_type, work2_r, decomp%x1count, & -real_type, DECOMP_2D_COMM_COL, ierror) -#else -call MPI_ALLTOALLV(work1_r, decomp%y1cnts, decomp%y1disp, & -real_type, work2_r, decomp%x1cnts, decomp%x1disp, & -real_type, DECOMP_2D_COMM_COL, ierror) -#endif -#endif - -! rearrange receive buffer -#ifdef SHM -call MPI_BARRIER(decomp%COL_INFO%CORE_COMM, ierror) -call mem_merge_yx_real(work2, d1, d2, d3, dst, dims(1), & -decomp%x1dist, decomp) -#else -call mem_merge_yx_real(work2_r, d1, d2, d3, dst, dims(1), & -decomp%x1dist, decomp) -#endif - -return -end subroutine transpose_y_to_x_real - - -#ifdef OCC -subroutine transpose_y_to_x_real_start(handle, src, dst, sbuf, rbuf, & -opt_decomp) - -implicit none - -integer :: handle -real(mytype), dimension(:,:,:) :: src, dst, sbuf, rbuf -TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp - -TYPE(DECOMP_INFO) :: decomp - -integer :: s1,s2,s3 -integer :: ierror - -if (present(opt_decomp)) then -decomp = opt_decomp -else -decomp = decomp_main -end if - -s1 = SIZE(src,1) -s2 = SIZE(src,2) -s3 = SIZE(src,3) - -! rearrange source array as send buffer -call mem_split_yx_real(src, s1, s2, s3, sbuf, dims(1), & -decomp%y1dist, decomp) - -#ifdef EVEN -call NBC_IALLTOALL(sbuf, decomp%y1count, real_type, & -rbuf, decomp%x1count, real_type, & -DECOMP_2D_COMM_COL, handle, ierror) -#else -call NBC_IALLTOALLV(sbuf, decomp%y1cnts, decomp%y1disp, real_type, & -rbuf, decomp%x1cnts, decomp%x1disp, real_type, & -DECOMP_2D_COMM_COL, handle, ierror) -#endif - -return -end subroutine transpose_y_to_x_real_start - - -subroutine transpose_y_to_x_real_wait(handle, src, dst, sbuf, rbuf, & -opt_decomp) - -implicit none - -integer :: handle -real(mytype), dimension(:,:,:) :: src, dst, sbuf, rbuf -TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp - -TYPE(DECOMP_INFO) :: decomp - -integer :: d1,d2,d3 -integer :: ierror - -if (present(opt_decomp)) then -decomp = opt_decomp -else -decomp = decomp_main -end if - -d1 = SIZE(dst,1) -d2 = SIZE(dst,2) -d3 = SIZE(dst,3) - -call NBC_WAIT(handle, ierror) - -! rearrange receive buffer -call mem_merge_yx_real(rbuf, d1, d2, d3, dst, dims(1), & -decomp%x1dist, decomp) - -return -end subroutine transpose_y_to_x_real_wait -#endif - - -subroutine transpose_y_to_x_complex(src, dst, opt_decomp) - -implicit none - -complex(mytype), dimension(:,:,:), intent(IN) :: src -complex(mytype), dimension(:,:,:), intent(OUT) :: dst -TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp - -TYPE(DECOMP_INFO) :: decomp - -#ifdef SHM -complex(mytype) :: work1(*), work2(*) -POINTER (work1_p, work1), (work2_p, work2) ! Cray pointers -#endif - -integer :: s1,s2,s3,d1,d2,d3 -integer :: ierror - -if (present(opt_decomp)) then -decomp = opt_decomp -else -decomp = decomp_main -end if - -s1 = SIZE(src,1) -s2 = SIZE(src,2) -s3 = SIZE(src,3) -d1 = SIZE(dst,1) -d2 = SIZE(dst,2) -d3 = SIZE(dst,3) - -! rearrange source array as send buffer -#ifdef SHM -work1_p = decomp%COL_INFO%SND_P_c -call mem_split_yx_complex(src, s1, s2, s3, work1, dims(1), & -decomp%y1dist, decomp) -#else -call mem_split_yx_complex(src, s1, s2, s3, work1_c, dims(1), & -decomp%y1dist, decomp) -#endif - -! define receive buffer -#ifdef SHM -work2_p = decomp%COL_INFO%RCV_P_c -call MPI_BARRIER(decomp%COL_INFO%CORE_COMM, ierror) -#endif - -! transpose using MPI_ALLTOALL(V) -#ifdef SHM -if (decomp%COL_INFO%CORE_ME==1) THEN -call MPI_ALLTOALLV(work1, decomp%y1cnts_s, decomp%y1disp_s, & -complex_type, work2, decomp%x1cnts_s, decomp%x1disp_s, & -complex_type, decomp%COL_INFO%SMP_COMM, ierror) -end if -#else -#ifdef EVEN -call MPI_ALLTOALL(work1_c, decomp%y1count, & -complex_type, work2_c, decomp%x1count, & -complex_type, DECOMP_2D_COMM_COL, ierror) -#else -call MPI_ALLTOALLV(work1_c, decomp%y1cnts, decomp%y1disp, & -complex_type, work2_c, decomp%x1cnts, decomp%x1disp, & -complex_type, DECOMP_2D_COMM_COL, ierror) -#endif -#endif - -! rearrange receive buffer -#ifdef SHM -call MPI_BARRIER(decomp%COL_INFO%CORE_COMM, ierror) -call mem_merge_yx_complex(work2, d1, d2, d3, dst, dims(1), & -decomp%x1dist, decomp) -#else -call mem_merge_yx_complex(work2_c, d1, d2, d3, dst, dims(1), & -decomp%x1dist, decomp) -#endif - -return -end subroutine transpose_y_to_x_complex - - -#ifdef OCC -subroutine transpose_y_to_x_complex_start(handle, src, dst, sbuf, & -rbuf, opt_decomp) - -implicit none - -integer :: handle -complex(mytype), dimension(:,:,:) :: src, dst, sbuf, rbuf -TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp - -TYPE(DECOMP_INFO) :: decomp - -integer :: s1,s2,s3 -integer :: ierror - -if (present(opt_decomp)) then -decomp = opt_decomp -else -decomp = decomp_main -end if - -s1 = SIZE(src,1) -s2 = SIZE(src,2) -s3 = SIZE(src,3) - -! rearrange source array as send buffer -call mem_split_yx_complex(src, s1, s2, s3, sbuf, dims(1), & -decomp%y1dist, decomp) - -#ifdef EVEN -call NBC_IALLTOALL(sbuf, decomp%y1count, & -complex_type, rbuf, decomp%x1count, & -complex_type, DECOMP_2D_COMM_COL, handle, ierror) -#else -call NBC_IALLTOALLV(sbuf, decomp%y1cnts, decomp%y1disp, & -complex_type, rbuf, decomp%x1cnts, decomp%x1disp, & -complex_type, DECOMP_2D_COMM_COL, handle, ierror) -#endif - -return -end subroutine transpose_y_to_x_complex_start - - -subroutine transpose_y_to_x_complex_wait(handle, src, dst, sbuf, & -rbuf, opt_decomp) - -implicit none - -integer :: handle -complex(mytype), dimension(:,:,:) :: src, dst, sbuf, rbuf -TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp - -TYPE(DECOMP_INFO) :: decomp - -integer :: d1,d2,d3 -integer :: ierror - -if (present(opt_decomp)) then -decomp = opt_decomp -else -decomp = decomp_main -end if - -d1 = SIZE(dst,1) -d2 = SIZE(dst,2) -d3 = SIZE(dst,3) - -call NBC_WAIT(handle, ierror) - -! rearrange receive buffer -call mem_merge_yx_complex(rbuf, d1, d2, d3, dst, dims(1), & -decomp%x1dist, decomp) - -return -end subroutine transpose_y_to_x_complex_wait -#endif - - -! pack/unpack ALLTOALL(V) buffers - -subroutine mem_split_yx_real(in,n1,n2,n3,out,iproc,dist,decomp) - -implicit none - -integer, intent(IN) :: n1,n2,n3 -real(mytype), dimension(n1,n2,n3), intent(IN) :: in -real(mytype), dimension(*), intent(OUT) :: out -integer, intent(IN) :: iproc -integer, dimension(0:iproc-1), intent(IN) :: dist -TYPE(DECOMP_INFO), intent(IN) :: decomp - -integer :: i,j,k, m,i1,i2,pos - -do m=0,iproc-1 -if (m==0) then -i1 = 1 -i2 = dist(0) -else -i1 = i2+1 -i2 = i1+dist(m)-1 -end if - -#ifdef SHM -pos = decomp%y1disp_o(m) + 1 -#else -#ifdef EVEN -pos = m * decomp%y1count + 1 -#else -pos = decomp%y1disp(m) + 1 -#endif -#endif - -do k=1,n3 -do j=i1,i2 -do i=1,n1 -out(pos) = in(i,j,k) -pos = pos + 1 -end do -end do -end do -end do - -return -end subroutine mem_split_yx_real - - -subroutine mem_split_yx_complex(in,n1,n2,n3,out,iproc,dist,decomp) - -implicit none - -integer, intent(IN) :: n1,n2,n3 -complex(mytype), dimension(n1,n2,n3), intent(IN) :: in -complex(mytype), dimension(*), intent(OUT) :: out -integer, intent(IN) :: iproc -integer, dimension(0:iproc-1), intent(IN) :: dist -TYPE(DECOMP_INFO), intent(IN) :: decomp - -integer :: i,j,k, m,i1,i2,pos - -do m=0,iproc-1 -if (m==0) then -i1 = 1 -i2 = dist(0) -else -i1 = i2+1 -i2 = i1+dist(m)-1 -end if - -#ifdef SHM -pos = decomp%y1disp_o(m) + 1 -#else -#ifdef EVEN -pos = m * decomp%y1count + 1 -#else -pos = decomp%y1disp(m) + 1 -#endif -#endif - -do k=1,n3 -do j=i1,i2 -do i=1,n1 -out(pos) = in(i,j,k) -pos = pos + 1 -end do -end do -end do -end do - -return -end subroutine mem_split_yx_complex - - -subroutine mem_merge_yx_real(in,n1,n2,n3,out,iproc,dist,decomp) - -implicit none - -integer, intent(IN) :: n1,n2,n3 -real(mytype), dimension(*), intent(IN) :: in -real(mytype), dimension(n1,n2,n3), intent(OUT) :: out -integer, intent(IN) :: iproc -integer, dimension(0:iproc-1), intent(IN) :: dist -TYPE(DECOMP_INFO), intent(IN) :: decomp - -integer :: i,j,k, m,i1,i2, pos - -do m=0,iproc-1 -if (m==0) then -i1 = 1 -i2 = dist(0) -else -i1 = i2+1 -i2 = i1+dist(m)-1 -end if - -#ifdef SHM -pos = decomp%x1disp_o(m) + 1 -#else -#ifdef EVEN -pos = m * decomp%x1count + 1 -#else -pos = decomp%x1disp(m) + 1 -#endif -#endif - -do k=1,n3 -do j=1,n2 -do i=i1,i2 -out(i,j,k) = in(pos) -pos = pos + 1 -end do -end do -end do -end do - -return -end subroutine mem_merge_yx_real - - -subroutine mem_merge_yx_complex(in,n1,n2,n3,out,iproc,dist,decomp) - -implicit none - -integer, intent(IN) :: n1,n2,n3 -complex(mytype), dimension(*), intent(IN) :: in -complex(mytype), dimension(n1,n2,n3), intent(OUT) :: out -integer, intent(IN) :: iproc -integer, dimension(0:iproc-1), intent(IN) :: dist -TYPE(DECOMP_INFO), intent(IN) :: decomp - -integer :: i,j,k, m,i1,i2, pos - -do m=0,iproc-1 -if (m==0) then -i1 = 1 -i2 = dist(0) -else -i1 = i2+1 -i2 = i1+dist(m)-1 -end if - -#ifdef SHM -pos = decomp%x1disp_o(m) + 1 -#else -#ifdef EVEN -pos = m * decomp%x1count + 1 -#else -pos = decomp%x1disp(m) + 1 -#endif -#endif - -do k=1,n3 -do j=1,n2 -do i=i1,i2 -out(i,j,k) = in(pos) -pos = pos + 1 -end do -end do -end do -end do - -return -end subroutine mem_merge_yx_complex diff --git a/decomp2d/transpose_y_to_z.inc b/decomp2d/transpose_y_to_z.inc deleted file mode 100644 index e6fbb14..0000000 --- a/decomp2d/transpose_y_to_z.inc +++ /dev/null @@ -1,524 +0,0 @@ -! -*- mode: f90 -*- -!======================================================================= -! This is part of the 2DECOMP&FFT library -! -! 2DECOMP&FFT is a software framework for general-purpose 2D (pencil) -! decomposition. It also implements a highly scalable distributed -! three-dimensional Fast Fourier Transform (FFT). -! -! Copyright (C) 2009-2011 Ning Li, the Numerical Algorithms Group (NAG) -! -!======================================================================= - -! This file contains the routines that transpose data from Y to Z pencil - -subroutine transpose_y_to_z_real(src, dst, opt_decomp) - - implicit none - - real(mytype), dimension(:,:,:), intent(IN) :: src - real(mytype), dimension(:,:,:), intent(OUT) :: dst - TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp - - TYPE(DECOMP_INFO) :: decomp - -#ifdef SHM - real(mytype) :: work1(*), work2(*) - POINTER (work1_p, work1), (work2_p, work2) ! Cray pointers -#endif - - integer :: s1,s2,s3,d1,d2,d3 - integer :: ierror - - if (present(opt_decomp)) then - decomp = opt_decomp - else - decomp = decomp_main - end if - - s1 = SIZE(src,1) - s2 = SIZE(src,2) - s3 = SIZE(src,3) - d1 = SIZE(dst,1) - d2 = SIZE(dst,2) - d3 = SIZE(dst,3) - - ! rearrange source array as send buffer -#ifdef SHM - work1_p = decomp%ROW_INFO%SND_P - call mem_split_yz_real(src, s1, s2, s3, work1, dims(2), & - decomp%y2dist, decomp) -#else - call mem_split_yz_real(src, s1, s2, s3, work1_r, dims(2), & - decomp%y2dist, decomp) -#endif - - ! define receive buffer -#ifdef SHM - work2_p = decomp%ROW_INFO%RCV_P - call MPI_BARRIER(decomp%ROW_INFO%CORE_COMM, ierror) -#endif - -#ifdef SHM - if (decomp%ROW_INFO%CORE_ME==1) THEN - call MPI_ALLTOALLV(work1, decomp%y2cnts_s, decomp%y2disp_s, & - real_type, work2, decomp%z2cnts_s, decomp%z2disp_s, & - real_type, decomp%ROW_INFO%SMP_COMM, ierror) - end if -#else -#ifdef EVEN - if (decomp%even) then - call MPI_ALLTOALL(work1_r, decomp%y2count, & - real_type, dst, decomp%z2count, & - real_type, DECOMP_2D_COMM_ROW, ierror) - else - call MPI_ALLTOALL(work1_r, decomp%y2count, & - real_type, work2_r, decomp%z2count, & - real_type, DECOMP_2D_COMM_ROW, ierror) - end if -#else - call MPI_ALLTOALLV(work1_r, decomp%y2cnts, decomp%y2disp, & - real_type, dst, decomp%z2cnts, decomp%z2disp, & - real_type, DECOMP_2D_COMM_ROW, ierror) -#endif -#endif - - ! rearrange receive buffer -#ifdef SHM - call MPI_BARRIER(decomp%ROW_INFO%CORE_COMM, ierror) - call mem_merge_yz_real(work2, d1, d2, d3, dst, dims(2), & - decomp%z2dist, decomp) -#else -#ifdef EVEN - if (.not. decomp%even) then - call mem_merge_yz_real(work2_r, d1, d2, d3, dst, dims(2), & - decomp%z2dist, decomp) - end if -#else - ! note the receive buffer is already in natural (i,j,k) order - ! so no merge operation needed -#endif -#endif - - return -end subroutine transpose_y_to_z_real - - -#ifdef OCC -subroutine transpose_y_to_z_real_start(handle, src, dst, sbuf, rbuf, & - opt_decomp) - - implicit none - - integer :: handle - real(mytype), dimension(:,:,:) :: src, dst, sbuf, rbuf - TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp - - TYPE(DECOMP_INFO) :: decomp - - integer :: s1,s2,s3 - integer :: ierror - - if (present(opt_decomp)) then - decomp = opt_decomp - else - decomp = decomp_main - end if - - s1 = SIZE(src,1) - s2 = SIZE(src,2) - s3 = SIZE(src,3) - - ! rearrange source array as send buffer - call mem_split_yz_real(src, s1, s2, s3, sbuf, dims(2), & - decomp%y2dist, decomp) - -#ifdef EVEN - call NBC_IALLTOALL(sbuf, decomp%y2count, real_type, & - rbuf, decomp%z2count, real_type, & - DECOMP_2D_COMM_ROW, handle, ierror) -#else - call NBC_IALLTOALLV(sbuf, decomp%y2cnts, decomp%y2disp, real_type, & - rbuf, decomp%z2cnts, decomp%z2disp, real_type, & - DECOMP_2D_COMM_ROW, handle, ierror) -#endif - - return -end subroutine transpose_y_to_z_real_start - - -subroutine transpose_y_to_z_real_wait(handle, src, dst, sbuf, rbuf, & - opt_decomp) - - implicit none - - integer :: handle - real(mytype), dimension(:,:,:) :: src, dst, sbuf, rbuf - TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp - - TYPE(DECOMP_INFO) :: decomp - - integer :: ierror - - if (present(opt_decomp)) then - decomp = opt_decomp - else - decomp = decomp_main - end if - - call NBC_WAIT(handle, ierror) - - dst = rbuf - - return -end subroutine transpose_y_to_z_real_wait -#endif - - -subroutine transpose_y_to_z_complex(src, dst, opt_decomp) - - implicit none - - complex(mytype), dimension(:,:,:), intent(IN) :: src - complex(mytype), dimension(:,:,:), intent(OUT) :: dst - TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp - - TYPE(DECOMP_INFO) :: decomp - -#ifdef SHM - complex(mytype) :: work1(*), work2(*) - POINTER (work1_p, work1), (work2_p, work2) ! Cray pointers -#endif - - integer :: s1,s2,s3,d1,d2,d3 - integer :: ierror - - if (present(opt_decomp)) then - decomp = opt_decomp - else - decomp = decomp_main - end if - - s1 = SIZE(src,1) - s2 = SIZE(src,2) - s3 = SIZE(src,3) - d1 = SIZE(dst,1) - d2 = SIZE(dst,2) - d3 = SIZE(dst,3) - - ! rearrange source array as send buffer -#ifdef SHM - work1_p = decomp%ROW_INFO%SND_P_c - call mem_split_yz_complex(src, s1, s2, s3, work1, dims(2), & - decomp%y2dist, decomp) -#else - call mem_split_yz_complex(src, s1, s2, s3, work1_c, dims(2), & - decomp%y2dist, decomp) -#endif - - ! define receive buffer -#ifdef SHM - work2_p = decomp%ROW_INFO%RCV_P_c - call MPI_BARRIER(decomp%ROW_INFO%CORE_COMM, ierror) -#endif - -#ifdef SHM - if (decomp%ROW_INFO%CORE_ME==1) THEN - call MPI_ALLTOALLV(work1, decomp%y2cnts_s, decomp%y2disp_s, & - complex_type, work2, decomp%z2cnts_s, decomp%z2disp_s, & - complex_type, decomp%ROW_INFO%SMP_COMM, ierror) - end if -#else -#ifdef EVEN - if (decomp%even) then - call MPI_ALLTOALL(work1_c, decomp%y2count, & - complex_type, dst, decomp%z2count, & - complex_type, DECOMP_2D_COMM_ROW, ierror) - else - call MPI_ALLTOALL(work1_c, decomp%y2count, & - complex_type, work2_c, decomp%z2count, & - complex_type, DECOMP_2D_COMM_ROW, ierror) - end if -#else - call MPI_ALLTOALLV(work1_c, decomp%y2cnts, decomp%y2disp, & - complex_type, dst, decomp%z2cnts, decomp%z2disp, & - complex_type, DECOMP_2D_COMM_ROW, ierror) -#endif -#endif - - ! rearrange receive buffer -#ifdef SHM - call MPI_BARRIER(decomp%ROW_INFO%CORE_COMM, ierror) - call mem_merge_yz_complex(work2, d1, d2, d3, dst, dims(2), & - decomp%z2dist, decomp) -#else -#ifdef EVEN - if (.not. decomp%even) then - call mem_merge_yz_complex(work2_c, d1, d2, d3, dst, dims(2), & - decomp%z2dist, decomp) - end if -#else - ! note the receive buffer is already in natural (i,j,k) order - ! so no merge operation needed -#endif -#endif - - return -end subroutine transpose_y_to_z_complex - - -#ifdef OCC -subroutine transpose_y_to_z_complex_start(handle, src, dst, sbuf, & - rbuf, opt_decomp) - - implicit none - - integer :: handle - complex(mytype), dimension(:,:,:) :: src, dst, sbuf, rbuf - TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp - - TYPE(DECOMP_INFO) :: decomp - - integer :: s1,s2,s3 - integer :: ierror - - if (present(opt_decomp)) then - decomp = opt_decomp - else - decomp = decomp_main - end if - - s1 = SIZE(src,1) - s2 = SIZE(src,2) - s3 = SIZE(src,3) - - ! rearrange source array as send buffer - call mem_split_yz_complex(src, s1, s2, s3, sbuf, dims(2), & - decomp%y2dist, decomp) - -#ifdef EVEN - call NBC_IALLTOALL(sbuf, decomp%y2count, & - complex_type, rbuf, decomp%z2count, & - complex_type, DECOMP_2D_COMM_ROW, handle, ierror) -#else - call NBC_IALLTOALLV(sbuf, decomp%y2cnts, decomp%y2disp, & - complex_type, rbuf,decomp%z2cnts, decomp%z2disp, & - complex_type, DECOMP_2D_COMM_ROW, handle, ierror) -#endif - - return -end subroutine transpose_y_to_z_complex_start - - -subroutine transpose_y_to_z_complex_wait(handle, src, dst, sbuf, & - rbuf, opt_decomp) - - implicit none - - integer :: handle - complex(mytype), dimension(:,:,:) :: src, dst, sbuf, rbuf - TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp - - TYPE(DECOMP_INFO) :: decomp - - integer :: ierror - - if (present(opt_decomp)) then - decomp = opt_decomp - else - decomp = decomp_main - end if - - call NBC_WAIT(handle, ierror) - - dst = rbuf - - return -end subroutine transpose_y_to_z_complex_wait -#endif - - -! pack/unpack ALLTOALL(V) buffers - -subroutine mem_split_yz_real(in,n1,n2,n3,out,iproc,dist,decomp) - - implicit none - - integer, intent(IN) :: n1,n2,n3 - real(mytype), dimension(n1,n2,n3), intent(IN) :: in - real(mytype), dimension(*), intent(OUT) :: out - integer, intent(IN) :: iproc - integer, dimension(0:iproc-1), intent(IN) :: dist - TYPE(DECOMP_INFO), intent(IN) :: decomp - - integer :: i,j,k, m,i1,i2,pos - - do m=0,iproc-1 - if (m==0) then - i1 = 1 - i2 = dist(0) - else - i1 = i2+1 - i2 = i1+dist(m)-1 - end if - -#ifdef SHM - pos = decomp%y2disp_o(m) + 1 -#else -#ifdef EVEN - pos = m * decomp%y2count + 1 -#else - pos = decomp%y2disp(m) + 1 -#endif -#endif - - do k=1,n3 - do j=i1,i2 - do i=1,n1 - out(pos) = in(i,j,k) - pos = pos + 1 - end do - end do - end do - end do - - return -end subroutine mem_split_yz_real - - -subroutine mem_split_yz_complex(in,n1,n2,n3,out,iproc,dist,decomp) - - implicit none - - integer, intent(IN) :: n1,n2,n3 - complex(mytype), dimension(n1,n2,n3), intent(IN) :: in - complex(mytype), dimension(*), intent(OUT) :: out - integer, intent(IN) :: iproc - integer, dimension(0:iproc-1), intent(IN) :: dist - TYPE(DECOMP_INFO), intent(IN) :: decomp - - integer :: i,j,k, m,i1,i2,pos - - do m=0,iproc-1 - if (m==0) then - i1 = 1 - i2 = dist(0) - else - i1 = i2+1 - i2 = i1+dist(m)-1 - end if - -#ifdef SHM - pos = decomp%y2disp_o(m) + 1 -#else -#ifdef EVEN - pos = m * decomp%y2count + 1 -#else - pos = decomp%y2disp(m) + 1 -#endif -#endif - - do k=1,n3 - do j=i1,i2 - do i=1,n1 - out(pos) = in(i,j,k) - pos = pos + 1 - end do - end do - end do - end do - - return -end subroutine mem_split_yz_complex - - -subroutine mem_merge_yz_real(in,n1,n2,n3,out,iproc,dist,decomp) - - implicit none - - integer, intent(IN) :: n1,n2,n3 - real(mytype), dimension(*), intent(IN) :: in - real(mytype), dimension(n1,n2,n3), intent(OUT) :: out - integer, intent(IN) :: iproc - integer, dimension(0:iproc-1), intent(IN) :: dist - TYPE(DECOMP_INFO), intent(IN) :: decomp - - integer :: i,j,k, m,i1,i2, pos - - do m=0,iproc-1 - if (m==0) then - i1 = 1 - i2 = dist(0) - else - i1 = i2+1 - i2 = i1+dist(m)-1 - end if - -#ifdef SHM - pos = decomp%z2disp_o(m) + 1 -#else -#ifdef EVEN - pos = m * decomp%z2count + 1 -#else - pos = decomp%z2disp(m) + 1 -#endif -#endif - - do k=i1,i2 - do j=1,n2 - do i=1,n1 - out(i,j,k) = in(pos) - pos = pos + 1 - end do - end do - end do - end do - - return -end subroutine mem_merge_yz_real - - -subroutine mem_merge_yz_complex(in,n1,n2,n3,out,iproc,dist,decomp) - - implicit none - - integer, intent(IN) :: n1,n2,n3 - complex(mytype), dimension(*), intent(IN) :: in - complex(mytype), dimension(n1,n2,n3), intent(OUT) :: out - integer, intent(IN) :: iproc - integer, dimension(0:iproc-1), intent(IN) :: dist - TYPE(DECOMP_INFO), intent(IN) :: decomp - - integer :: i,j,k, m,i1,i2, pos - - do m=0,iproc-1 - if (m==0) then - i1 = 1 - i2 = dist(0) - else - i1 = i2+1 - i2 = i1+dist(m)-1 - end if - -#ifdef SHM - pos = decomp%z2disp_o(m) + 1 -#else -#ifdef EVEN - pos = m * decomp%z2count + 1 -#else - pos = decomp%z2disp(m) + 1 -#endif -#endif - - do k=i1,i2 - do j=1,n2 - do i=1,n1 - out(i,j,k) = in(pos) - pos = pos + 1 - end do - end do - end do - end do - - return -end subroutine mem_merge_yz_complex diff --git a/decomp2d/transpose_z_to_y.inc b/decomp2d/transpose_z_to_y.inc deleted file mode 100644 index 22c6f11..0000000 --- a/decomp2d/transpose_z_to_y.inc +++ /dev/null @@ -1,524 +0,0 @@ -! -*- mode: f90 -*- -!======================================================================= -! This is part of the 2DECOMP&FFT library -! -! 2DECOMP&FFT is a software framework for general-purpose 2D (pencil) -! decomposition. It also implements a highly scalable distributed -! three-dimensional Fast Fourier Transform (FFT). -! -! Copyright (C) 2009-2011 Ning Li, the Numerical Algorithms Group (NAG) -! -!======================================================================= - -! This file contains the routines that transpose data from Z to Y pencil - -subroutine transpose_z_to_y_real(src, dst, opt_decomp) - - implicit none - - real(mytype), dimension(:,:,:), intent(IN) :: src - real(mytype), dimension(:,:,:), intent(OUT) :: dst - TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp - - TYPE(DECOMP_INFO) :: decomp - -#ifdef SHM - real(mytype) :: work1(*), work2(*) - POINTER (work1_p, work1), (work2_p, work2) ! Cray pointers -#endif - - integer :: s1,s2,s3,d1,d2,d3 - integer :: ierror - - if (present(opt_decomp)) then - decomp = opt_decomp - else - decomp = decomp_main - end if - - s1 = SIZE(src,1) - s2 = SIZE(src,2) - s3 = SIZE(src,3) - d1 = SIZE(dst,1) - d2 = SIZE(dst,2) - d3 = SIZE(dst,3) - - ! rearrange source array as send buffer -#ifdef SHM - work1_p = decomp%ROW_INFO%SND_P - call mem_split_zy_real(src, s1, s2, s3, work1, dims(2), & - decomp%z2dist, decomp) -#else -#ifdef EVEN - if (.not. decomp%even) then - call mem_split_zy_real(src, s1, s2, s3, work1_r, dims(2), & - decomp%z2dist, decomp) - end if -#else - ! note the src array is suitable to be a send buffer - ! so no split operation needed -#endif -#endif - - ! define receive buffer -#ifdef SHM - work2_p = decomp%ROW_INFO%RCV_P - call MPI_BARRIER(decomp%ROW_INFO%CORE_COMM, ierror) -#endif - -#ifdef SHM - if (decomp%ROW_INFO%CORE_ME==1) THEN - call MPI_ALLTOALLV(work1, decomp%z2cnts_s, decomp%z2disp_s, & - real_type, work2, decomp%y2cnts_s, decomp%y2disp_s, & - real_type, decomp%ROW_INFO%SMP_COMM, ierror) - end if -#else -#ifdef EVEN - if (decomp%even) then - call MPI_ALLTOALL(src, decomp%z2count, & - real_type, work2_r, decomp%y2count, & - real_type, DECOMP_2D_COMM_ROW, ierror) - else - call MPI_ALLTOALL(work1_r, decomp%z2count, & - real_type, work2_r, decomp%y2count, & - real_type, DECOMP_2D_COMM_ROW, ierror) - end if -#else - call MPI_ALLTOALLV(src, decomp%z2cnts, decomp%z2disp, & - real_type, work2_r, decomp%y2cnts, decomp%y2disp, & - real_type, DECOMP_2D_COMM_ROW, ierror) -#endif -#endif - - ! rearrange receive buffer -#ifdef SHM - call MPI_BARRIER(decomp%ROW_INFO%CORE_COMM, ierror) - call mem_merge_zy_real(work2, d1, d2, d3, dst, dims(2), & - decomp%y2dist, decomp) -#else - call mem_merge_zy_real(work2_r, d1, d2, d3, dst, dims(2), & - decomp%y2dist, decomp) -#endif - - return -end subroutine transpose_z_to_y_real - - -#ifdef OCC -subroutine transpose_z_to_y_real_start(handle, src, dst, sbuf, rbuf, & - opt_decomp) - - implicit none - - integer :: handle - real(mytype), dimension(:,:,:) :: src, dst, sbuf, rbuf - TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp - - TYPE(DECOMP_INFO) :: decomp - - integer :: ierror - - if (present(opt_decomp)) then - decomp = opt_decomp - else - decomp = decomp_main - end if - - sbuf = src - -#ifdef EVEN - call NBC_IALLTOALL(sbuf, decomp%z2count, real_type, & - rbuf, decomp%y2count, real_type, & - DECOMP_2D_COMM_ROW, handle, ierror) -#else - call NBC_IALLTOALLV(sbuf, decomp%z2cnts, decomp%z2disp, real_type, & - rbuf, decomp%y2cnts, decomp%y2disp, real_type, & - DECOMP_2D_COMM_ROW, handle, ierror) -#endif - - return -end subroutine transpose_z_to_y_real_start - - -subroutine transpose_z_to_y_real_wait(handle, src, dst, sbuf, rbuf, & - opt_decomp) - - implicit none - - integer :: handle - real(mytype), dimension(:,:,:) :: src, dst, sbuf, rbuf - TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp - - TYPE(DECOMP_INFO) :: decomp - - integer :: d1,d2,d3 - integer :: ierror - - if (present(opt_decomp)) then - decomp = opt_decomp - else - decomp = decomp_main - end if - - d1 = SIZE(dst,1) - d2 = SIZE(dst,2) - d3 = SIZE(dst,3) - - call NBC_WAIT(handle, ierror) - - ! rearrange receive buffer - call mem_merge_zy_real(rbuf, d1, d2, d3, dst, dims(2), & - decomp%y2dist, decomp) - - return -end subroutine transpose_z_to_y_real_wait -#endif - - -subroutine transpose_z_to_y_complex(src, dst, opt_decomp) - - implicit none - - complex(mytype), dimension(:,:,:), intent(IN) :: src - complex(mytype), dimension(:,:,:), intent(OUT) :: dst - TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp - - TYPE(DECOMP_INFO) :: decomp - -#ifdef SHM - complex(mytype) :: work1(*), work2(*) - POINTER (work1_p, work1), (work2_p, work2) ! Cray pointers -#endif - - integer :: s1,s2,s3,d1,d2,d3 - integer :: ierror - - if (present(opt_decomp)) then - decomp = opt_decomp - else - decomp = decomp_main - end if - - s1 = SIZE(src,1) - s2 = SIZE(src,2) - s3 = SIZE(src,3) - d1 = SIZE(dst,1) - d2 = SIZE(dst,2) - d3 = SIZE(dst,3) - - ! rearrange source array as send buffer -#ifdef SHM - work1_p = decomp%ROW_INFO%SND_P_c - call mem_split_zy_complex(src, s1, s2, s3, work1, dims(2), & - decomp%z2dist, decomp) -#else -#ifdef EVEN - if (.not. decomp%even) then - call mem_split_zy_complex(src, s1, s2, s3, work1_c, dims(2), & - decomp%z2dist, decomp) - end if -#else - ! note the src array is suitable to be a send buffer - ! so no split operation needed -#endif -#endif - - ! define receive buffer -#ifdef SHM - work2_p = decomp%ROW_INFO%RCV_P_c - call MPI_BARRIER(decomp%ROW_INFO%CORE_COMM, ierror) -#endif - -#ifdef SHM - if (decomp%ROW_INFO%CORE_ME==1) THEN - call MPI_ALLTOALLV(work1, decomp%z2cnts_s, decomp%z2disp_s, & - complex_type, work2, decomp%y2cnts_s, decomp%y2disp_s, & - complex_type, decomp%ROW_INFO%SMP_COMM, ierror) - end if -#else -#ifdef EVEN - if (decomp%even) then - call MPI_ALLTOALL(src, decomp%z2count, & - complex_type, work2_c, decomp%y2count, & - complex_type, DECOMP_2D_COMM_ROW, ierror) - else - call MPI_ALLTOALL(work1_c, decomp%z2count, & - complex_type, work2_c, decomp%y2count, & - complex_type, DECOMP_2D_COMM_ROW, ierror) - end if -#else - call MPI_ALLTOALLV(src, decomp%z2cnts, decomp%z2disp, & - complex_type, work2_c, decomp%y2cnts, decomp%y2disp, & - complex_type, DECOMP_2D_COMM_ROW, ierror) -#endif -#endif - - ! rearrange receive buffer -#ifdef SHM - call MPI_BARRIER(decomp%ROW_INFO%CORE_COMM, ierror) - call mem_merge_zy_complex(work2, d1, d2, d3, dst, dims(2), & - decomp%y2dist, decomp) -#else - call mem_merge_zy_complex(work2_c, d1, d2, d3, dst, dims(2), & - decomp%y2dist, decomp) -#endif - - return -end subroutine transpose_z_to_y_complex - - -#ifdef OCC -subroutine transpose_z_to_y_complex_start(handle, src, dst, sbuf, & - rbuf, opt_decomp) - - implicit none - - integer :: handle - complex(mytype), dimension(:,:,:) :: src, dst, sbuf, rbuf - TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp - - TYPE(DECOMP_INFO) :: decomp - - integer :: ierror - - if (present(opt_decomp)) then - decomp = opt_decomp - else - decomp = decomp_main - end if - - sbuf = src - -#ifdef EVEN - call NBC_IALLTOALL(sbuf, decomp%z2count, & - complex_type, rbuf, decomp%y2count, & - complex_type, DECOMP_2D_COMM_ROW, handle, ierror) -#else - call NBC_IALLTOALLV(sbuf, decomp%z2cnts, decomp%z2disp, & - complex_type, rbuf, decomp%y2cnts, decomp%y2disp, & - complex_type, DECOMP_2D_COMM_ROW, handle, ierror) -#endif - - return -end subroutine transpose_z_to_y_complex_start - - -subroutine transpose_z_to_y_complex_wait(handle, src, dst, sbuf, & - rbuf, opt_decomp) - - implicit none - - integer :: handle - complex(mytype), dimension(:,:,:) :: src, dst, sbuf, rbuf - TYPE(DECOMP_INFO), intent(IN), optional :: opt_decomp - - TYPE(DECOMP_INFO) :: decomp - - integer :: d1,d2,d3 - integer :: ierror - - if (present(opt_decomp)) then - decomp = opt_decomp - else - decomp = decomp_main - end if - - d1 = SIZE(dst,1) - d2 = SIZE(dst,2) - d3 = SIZE(dst,3) - - call NBC_WAIT(handle, ierror) - - ! rearrange receive buffer - call mem_merge_zy_complex(rbuf, d1, d2, d3, dst, dims(2), & - decomp%y2dist, decomp) - - return -end subroutine transpose_z_to_y_complex_wait -#endif - - -! pack/unpack ALLTOALL(V) buffers - -subroutine mem_split_zy_real(in,n1,n2,n3,out,iproc,dist,decomp) - - implicit none - - integer, intent(IN) :: n1,n2,n3 - real(mytype), dimension(n1,n2,n3), intent(IN) :: in - real(mytype), dimension(*), intent(OUT) :: out - integer, intent(IN) :: iproc - integer, dimension(0:iproc-1), intent(IN) :: dist - TYPE(DECOMP_INFO), intent(IN) :: decomp - - integer :: i,j,k, m,i1,i2,pos - - do m=0,iproc-1 - if (m==0) then - i1 = 1 - i2 = dist(0) - else - i1 = i2+1 - i2 = i1+dist(m)-1 - end if - -#ifdef SHM - pos = decomp%z2disp_o(m) + 1 -#else -#ifdef EVEN - pos = m * decomp%z2count + 1 -#else - pos = decomp%z2disp(m) + 1 -#endif -#endif - - do k=i1,i2 - do j=1,n2 - do i=1,n1 - out(pos) = in(i,j,k) - pos = pos + 1 - end do - end do - end do - end do - - return -end subroutine mem_split_zy_real - - -subroutine mem_split_zy_complex(in,n1,n2,n3,out,iproc,dist,decomp) - - implicit none - - integer, intent(IN) :: n1,n2,n3 - complex(mytype), dimension(n1,n2,n3), intent(IN) :: in - complex(mytype), dimension(*), intent(OUT) :: out - integer, intent(IN) :: iproc - integer, dimension(0:iproc-1), intent(IN) :: dist - TYPE(DECOMP_INFO), intent(IN) :: decomp - - integer :: i,j,k, m,i1,i2,pos - - do m=0,iproc-1 - if (m==0) then - i1 = 1 - i2 = dist(0) - else - i1 = i2+1 - i2 = i1+dist(m)-1 - end if - -#ifdef SHM - pos = decomp%z2disp_o(m) + 1 -#else -#ifdef EVEN - pos = m * decomp%z2count + 1 -#else - pos = decomp%z2disp(m) + 1 -#endif -#endif - - do k=i1,i2 - do j=1,n2 - do i=1,n1 - out(pos) = in(i,j,k) - pos = pos + 1 - end do - end do - end do - end do - - return -end subroutine mem_split_zy_complex - - -subroutine mem_merge_zy_real(in,n1,n2,n3,out,iproc,dist,decomp) - - implicit none - - integer, intent(IN) :: n1,n2,n3 - real(mytype), dimension(*), intent(IN) :: in - real(mytype), dimension(n1,n2,n3), intent(OUT) :: out - integer, intent(IN) :: iproc - integer, dimension(0:iproc-1), intent(IN) :: dist - TYPE(DECOMP_INFO), intent(IN) :: decomp - - integer :: i,j,k, m,i1,i2, pos - - do m=0,iproc-1 - if (m==0) then - i1 = 1 - i2 = dist(0) - else - i1 = i2+1 - i2 = i1+dist(m)-1 - end if - -#ifdef SHM - pos = decomp%y2disp_o(m) + 1 -#else -#ifdef EVEN - pos = m * decomp%y2count + 1 -#else - pos = decomp%y2disp(m) + 1 -#endif -#endif - - do k=1,n3 - do j=i1,i2 - do i=1,n1 - out(i,j,k) = in(pos) - pos = pos + 1 - end do - end do - end do - end do - - return -end subroutine mem_merge_zy_real - - -subroutine mem_merge_zy_complex(in,n1,n2,n3,out,iproc,dist,decomp) - - implicit none - - integer, intent(IN) :: n1,n2,n3 - complex(mytype), dimension(*), intent(IN) :: in - complex(mytype), dimension(n1,n2,n3), intent(OUT) :: out - integer, intent(IN) :: iproc - integer, dimension(0:iproc-1), intent(IN) :: dist - TYPE(DECOMP_INFO), intent(IN) :: decomp - - integer :: i,j,k, m,i1,i2, pos - - do m=0,iproc-1 - if (m==0) then - i1 = 1 - i2 = dist(0) - else - i1 = i2+1 - i2 = i1+dist(m)-1 - end if - -#ifdef SHM - pos = decomp%y2disp_o(m) + 1 -#else -#ifdef EVEN - pos = m * decomp%y2count + 1 -#else - pos = decomp%y2disp(m) + 1 -#endif -#endif - - do k=1,n3 - do j=i1,i2 - do i=1,n1 - out(i,j,k) = in(pos) - pos = pos + 1 - end do - end do - end do - end do - - return -end subroutine mem_merge_zy_complex diff --git a/src/case.f90 b/src/case.f90 index 75a106e..41ace93 100644 --- a/src/case.f90 +++ b/src/case.f90 @@ -8,6 +8,7 @@ module case use decomp_2d use variables + use var, only : ph1 use var, only : nzmsize implicit none diff --git a/src/navier.f90 b/src/navier.f90 index d09dc69..c1a50b1 100644 --- a/src/navier.f90 +++ b/src/navier.f90 @@ -4,6 +4,8 @@ module navier + USE var, ONLY : ph1, ph2, ph3, ph4 + implicit none private @@ -21,7 +23,7 @@ module navier !############################################################################ SUBROUTINE solve_poisson(pp3, px1, py1, pz1, ux1, uy1, uz1) - USE decomp_2d, ONLY : mytype, xsize, zsize, ph1 + USE decomp_2d, ONLY : mytype, xsize, zsize USE decomp_2d_poisson, ONLY : poisson USE var, ONLY : nzmsize USE var, ONLY : dv3 diff --git a/src/variables.f90 b/src/variables.f90 index cd4b435..3fd155d 100644 --- a/src/variables.f90 +++ b/src/variables.f90 @@ -9,6 +9,11 @@ module var USE param USE complex_geometry + implicit none + + ! Grids + type(decomp_info) :: ph1, ph2, ph3, ph4, phG + ! define all major arrays here real(mytype), save, allocatable, dimension(:,:,:) :: ux1, ux2, ux3, po3, dv3 real(mytype), save, allocatable, dimension(:,:,:,:) :: pp3 @@ -24,26 +29,6 @@ module var real(mytype), save, allocatable, dimension(:,:,:,:,:) :: dphi1 real(mytype), save, allocatable, dimension(:,:,:) :: mu1,mu2,mu3 - !arrays for post processing - real(mytype), save, allocatable, dimension(:,:,:) :: f1,fm1 - real(mytype), save, allocatable, dimension(:,:,:) :: uxm1, uym1, phim1, prem1, dissm1 - real(mytype), save, allocatable, dimension(:,:,:) :: uxm2, uym2, phim2, prem2, dissm2 - - !arrays for statistic collection - real(mytype), save, allocatable, dimension(:,:,:) :: umean,vmean,wmean,pmean,uumean,vvmean,wwmean,uvmean,uwmean,vwmean,tmean - real(mytype), save, allocatable, dimension(:,:,:,:) :: phimean,phiphimean,uphimean,vphimean,wphimean - real(mytype), save, allocatable, dimension(:,:,:) :: tik1,tik2,tak1,tak2 - real(mytype), save, allocatable, dimension(:,:,:) :: u1sum_tik,u1sum_tak - real(mytype), save, allocatable, dimension(:,:,:) :: u1sum,v1sum,w1sum,u2sum,v2sum,w2sum - real(mytype), save, allocatable, dimension(:,:,:) :: u3sum,v3sum,w3sum,u4sum,v4sum,w4sum - real(mytype), save, allocatable, dimension(:,:,:) :: uvsum,uwsum,vwsum,disssum,presum,tsum - - !arrays for extra statistics collection - real(mytype), save, allocatable, dimension(:,:,:) :: dudxsum,utmapsum - - !arrays for visualization - real(mytype), save, allocatable, dimension(:,:,:) :: uvisu - ! define all work arrays here real(mytype), save, allocatable, dimension(:,:,:) :: ta1,tb1,tc1,td1,& te1,tf1,tg1,th1,ti1,di1 @@ -80,6 +65,8 @@ subroutine init_variables TYPE(DECOMP_INFO), save :: ph! decomposition object + integer :: i, j, k + #ifdef DEBG if (nrank .eq. 0) print *,'# init_variables start' #endif @@ -105,7 +92,15 @@ subroutine init_variables !xsize(i), ysize(i), zsize(i), i=1,2,3 - sizes of the sub-domains held by the current process. The first letter refers to the pencil orientation and the three 1D array elements contain the sub-domain sizes in X, Y and Z directions, respectively. In a 2D pencil decomposition, there is always one dimension which completely resides in local memory. So by definition xsize(1)==nx_global, ysize(2)==ny_global and zsize(3)==nz_global. !xstart(i), ystart(i), zstart(i), xend(i), yend(i), zend(i), i=1,2,3 - the starting and ending indices for each sub-domain, as in the global coordinate system. Obviously, it can be seen that xsize(i)=xend(i)-xstart(i)+1. It may be convenient for certain applications to use global coordinate (for example when extracting a 2D plane from a 3D domain, it is easier to know which process owns the plane if global index is used). + + !div: nx ny nz --> nxm ny nz --> nxm nym nz --> nxm nym nzm + call decomp_info_init(nxm, nym, nzm, ph1) + call decomp_info_init(nxm, ny, nz, ph4) + !gradp: nxm nym nzm -> nxm nym nz --> nxm ny nz --> nx ny nz + call decomp_info_init(nxm, ny, nz, ph2) + call decomp_info_init(nxm, nym, nz, ph3) + call decomp_info_init(nxm,nym,nzm,phG) ! XXX: Why does this exist? !X PENCILS call alloc_x(ux1, opt_global=.true.) !global indices @@ -177,25 +172,6 @@ subroutine init_variables dpdxzn=zero dpdyzn=zero - !arrays for visualization!pay attention to the size! - allocate(uvisu(xstV(1):xenV(1),xstV(2):xenV(2),xstV(3):xenV(3))) - - !arrays statistics - allocate (umean(xstS(1):xenS(1),xstS(2):xenS(2),xstS(3):xenS(3))) - allocate (vmean(xstS(1):xenS(1),xstS(2):xenS(2),xstS(3):xenS(3))) - allocate (wmean(xstS(1):xenS(1),xstS(2):xenS(2),xstS(3):xenS(3))) - allocate (pmean(xstS(1):xenS(1),xstS(2):xenS(2),xstS(3):xenS(3))) - allocate (uumean(xstS(1):xenS(1),xstS(2):xenS(2),xstS(3):xenS(3))) - allocate (vvmean(xstS(1):xenS(1),xstS(2):xenS(2),xstS(3):xenS(3))) - allocate (wwmean(xstS(1):xenS(1),xstS(2):xenS(2),xstS(3):xenS(3))) - allocate (uvmean(xstS(1):xenS(1),xstS(2):xenS(2),xstS(3):xenS(3))) - allocate (uwmean(xstS(1):xenS(1),xstS(2):xenS(2),xstS(3):xenS(3))) - allocate (vwmean(xstS(1):xenS(1),xstS(2):xenS(2),xstS(3):xenS(3))) - allocate (phimean(xstS(1):xenS(1),xstS(2):xenS(2),xstS(3):xenS(3),numscalar)) - allocate (phiphimean(xstS(1):xenS(1),xstS(2):xenS(2),xstS(3):xenS(3),numscalar)) - allocate (tmean(xstS(1):xenS(1),xstS(2):xenS(2),xstS(3):xenS(3))) - - !Y PENCILS call alloc_y(ux2);call alloc_y(uy2);call alloc_y(uz2) call alloc_y(ta2);call alloc_y(tb2);call alloc_y(tc2) @@ -493,4 +469,6 @@ subroutine init_variables #endif return end subroutine init_variables + + end module var diff --git a/src/xcompact3d.f90 b/src/xcompact3d.f90 index 577f53e..6f2a998 100644 --- a/src/xcompact3d.f90 +++ b/src/xcompact3d.f90 @@ -146,22 +146,12 @@ subroutine init_xcompact3d(trun) call parameter() call decomp_2d_init(nx,ny,nz,p_row,p_col) - call init_coarser_mesh_statS(nstat,nstat,nstat,.true.) !start from 1 == true - call init_coarser_mesh_statV(nvisu,nvisu,nvisu,.true.) !start from 1 == true - call init_coarser_mesh_statP(nprobe,nprobe,nprobe,.true.) !start from 1 == true - !div: nx ny nz --> nxm ny nz --> nxm nym nz --> nxm nym nzm - call decomp_info_init(nxm, nym, nzm, ph1) - call decomp_info_init(nxm, ny, nz, ph4) - !gradp: nxm nym nzm -> nxm nym nz --> nxm ny nz --> nx ny nz - call decomp_info_init(nxm, ny, nz, ph2) - call decomp_info_init(nxm, nym, nz, ph3) call init_variables() call schemes() call decomp_2d_poisson_init() - call decomp_info_init(nxm,nym,nzm,phG) call init_flowfield()