@@ -39,20 +39,24 @@ MODULE TSSubs
3939! ! real array) of the simulated velocity (wind/water speed). It returns
4040! ! values FOR ONLY the velocity components that use the IEC method for
4141! ! computing spatial coherence; i.e., for i where SCMod(i) == CohMod_IEC
42- SUBROUTINE CalcFourierCoeffs_IEC ( p , U , PhaseAngles , S , V , TRH , ErrStat , ErrMsg )
42+ ! !
43+ ! ! OpenMP: This makes a copy of the TRH array for each thread to use, which
44+ ! ! is a little inefficient, but the speedup from parallelization
45+ ! ! should outweigh the memory overhead. In the single threaded case,
46+ ! ! a single copy is made, which is relatively negligible.
47+ SUBROUTINE CalcFourierCoeffs_IEC ( p , U , PhaseAngles , S , V , TRH_in , ErrStat , ErrMsg )
4348
4449TYPE (TurbSim_ParameterType), INTENT (IN ) :: p ! < TurbSim parameters
4550REAL (ReKi), INTENT (IN ) :: U (:) ! < The steady u-component wind speeds for the grid (NPoints).
4651REAL (ReKi), INTENT (IN ) :: PhaseAngles (:,:,:) ! < The array that holds the random phases [number of points, number of frequencies, number of wind components=3].
4752REAL (ReKi), INTENT (IN ) :: S (:,:,:) ! < The turbulence PSD array (NumFreq,NPoints,3).
4853REAL (ReKi), INTENT (INOUT ) :: V (:,:,:) ! < An array containing the summations of the rows of H (NumSteps,NPoints,3).
49- REAL (ReKi), INTENT (INOUT ) :: TRH (:) ! < The transfer function matrix. just used as a work array
54+ REAL (ReKi), INTENT (INOUT ) :: TRH_in (:) ! < The transfer function matrix. just used as a work array
5055INTEGER (IntKi), INTENT (OUT ) :: ErrStat
5156CHARACTER (* ), INTENT (OUT ) :: ErrMsg
5257
53-
54- ! Internal variables
55-
58+ ! $OMP THREADPRIVATE(TRH)
59+ REAL (ReKi), allocatable , save :: TRH(:) ! Each OMP thread gets its own copy of this array
5660REAL (ReKi), ALLOCATABLE :: Dist(:) ! The distance between points
5761REAL (ReKi), ALLOCATABLE :: DistU(:)
5862
@@ -64,25 +68,22 @@ SUBROUTINE CalcFourierCoeffs_IEC( p, U, PhaseAngles, S, V, TRH, ErrStat, ErrMsg
6468
6569INTEGER (IntKi) :: ErrStat2
6670CHARACTER (MaxMsgLen) :: ErrMsg2
67-
68-
69-
71+
7072 ErrStat = ErrID_None
7173 ErrMsg = " "
7274
73- IF (.NOT. ANY (p% met% SCMod == CohMod_IEC) ) RETURN
75+ IF (.NOT. ANY (p% met% SCMod == CohMod_IEC)) RETURN
7476
7577 !- -------------------------------------------------------------------------------
7678 ! allocate arrays
7779 !- -------------------------------------------------------------------------------
78- CALL AllocAry( Dist, p% grid% NPacked, ' Dist coherence array' , ErrStat2, ErrMsg2 ); CALL SetErrStat(ErrStat2, ErrMsg2, ErrStat, ErrMsg, ' CalcFourierCoeffs_IEC' )
79- CALL AllocAry( DistU, p% grid% NPacked, ' DistU coherence array' , ErrStat2, ErrMsg2 ); CALL SetErrStat(ErrStat2, ErrMsg2, ErrStat, ErrMsg, ' CalcFourierCoeffs_IEC' )
80- IF (ErrStat >= AbortErrLev) THEN
81- CALL Cleanup()
82- RETURN
83- END IF
84-
85-
80+
81+ CALL AllocAry( Dist, p% grid% NPacked, ' Dist coherence array' , ErrStat2, ErrMsg2 ); CALL SetErrStat(ErrStat2, ErrMsg2, ErrStat, ErrMsg, ' CalcFourierCoeffs_IEC' )
82+ CALL AllocAry( DistU, p% grid% NPacked, ' DistU coherence array' , ErrStat2, ErrMsg2 ); CALL SetErrStat(ErrStat2, ErrMsg2, ErrStat, ErrMsg, ' CalcFourierCoeffs_IEC' )
83+ IF (ErrStat >= AbortErrLev) RETURN
84+
85+ TRH = TRH_in ! point the PRIVATE array to the passed in array for single thread case
86+
8687 !- -------------------------------------------------------------------------------
8788 ! Calculate the distances and other parameters that don't change with frequency
8889 !- --------------------------------------------------------------------------------
@@ -115,7 +116,13 @@ SUBROUTINE CalcFourierCoeffs_IEC( p, U, PhaseAngles, S, V, TRH, ErrStat, ErrMsg
115116 ! Calculate the coherence, Veers' H matrix (CSDs), and the fourier coefficients
116117 !- --------------------------------------------------------------------------------
117118
118- DO IFREQ = 1 ,p% grid% NumFreq
119+ ! $OMP PARALLEL DO &
120+ ! $OMP DEFAULT(None) &
121+ ! $OMP SHARED(p, PhaseAngles, S, V, Dist, DistU, IVec, ErrStat, ErrMsg, AbortErrLev) &
122+ ! $OMP PRIVATE(Indx, I, J, ErrStat2, ErrMsg2) &
123+ ! $OMP COPYIN(TRH)
124+ DO IFREQ = 1 , p% grid% NumFreq
125+
119126 ! -----------------------------------------------
120127 ! Create the coherence matrix for this frequency
121128 ! -----------------------------------------------
@@ -149,27 +156,18 @@ SUBROUTINE CalcFourierCoeffs_IEC( p, U, PhaseAngles, S, V, TRH, ErrStat, ErrMsg
149156 ! use H matrix to calculate coefficients
150157 ! -----------------------------------------------
151158
152- CALL Coh2H( p, IVec, IFreq, TRH, S, ErrStat2, ErrMsg2 )
159+ CALL Coh2H(p, IVec, IFreq, TRH, S, ErrStat2, ErrMsg2)
160+ if (ErrStat2 >= AbortErrLev) then
161+ ! $OMP CRITICAL
153162 CALL SetErrStat(ErrStat2, ErrMsg2, ErrStat, ErrMsg, ' CalcFourierCoeffs_IEC' )
154- IF (ErrStat >= AbortErrLev) THEN
155- CALL Cleanup()
156- RETURN
157- END IF
158- CALL H2Coeffs( IVec, IFreq, TRH, PhaseAngles, V, p% grid% NPoints )
159- END DO ! IFreq
163+ ! $OMP END CRITICAL
164+ else
165+ CALL H2Coeffs( IVec, IFreq, TRH, PhaseAngles, V, p% grid% NPoints )
166+ endif
160167
168+ END DO ! IFreq
161169 END DO ! IVec
162170
163- CALL Cleanup()
164- RETURN
165-
166- ! ............................................
167- CONTAINS
168- SUBROUTINE Cleanup ()
169-
170- IF ( ALLOCATED ( Dist ) ) DEALLOCATE ( Dist )
171- IF ( ALLOCATED ( DistU ) ) DEALLOCATE ( DistU )
172- END SUBROUTINE Cleanup
173171! ............................................
174172END SUBROUTINE CalcFourierCoeffs_IEC
175173! =======================================================================
@@ -720,18 +718,17 @@ SUBROUTINE EyeCoh2H( IVec, IFreq, TRH, S, NPoints )
720718 ! -----------------------------------------------------------------------------------
721719
722720 Indx = 1
721+ TRH = 0.0
723722 DO J = 1 ,NPoints ! The column number
724723
725724 ! The diagonal entries of the matrix:
726725
727726 TRH(Indx) = SQRT ( ABS ( S(IFreq,J,IVec) ) )
728727
729- ! The off-diagonal values:
730- Indx = Indx + 1
731- DO I = J+1 ,NPoints ! The row number
732- TRH(Indx) = 0.0
733- Indx = Indx + 1
734- ENDDO ! I
728+ ! skip rest of row (NPoints-1) -- these are off diagonal elements that are zero.
729+ ! Then add 1 to get to next diagonal entry
730+ Indx = Indx + (NPoints - J) + 1
731+
735732 ENDDO ! J
736733
737734END SUBROUTINE EyeCoh2H
@@ -751,8 +748,9 @@ SUBROUTINE Coh2H( p, IVec, IFreq, TRH, S, ErrStat, ErrMsg )
751748
752749
753750integer :: Indx, J, I, NPts
754-
751+ integer :: old_max_levels ! maximum nesting levels for OPENMP
755752
753+
756754 ! -------------------------------------------------------------
757755 ! Calculate the Cholesky factorization for the coherence matrix
758756 ! -------------------------------------------------------------
@@ -872,115 +870,81 @@ END SUBROUTINE H2Coeffs
872870! > This routine takes the Fourier coefficients and converts them to velocity
873871! ! note that the resulting time series has zero mean.
874872SUBROUTINE Coeffs2TimeSeries ( V , NumSteps , NPoints , NUsrPoints , ErrStat , ErrMsg )
875-
876-
877- ! USE NWTC_FFTPACK
878-
879- IMPLICIT NONE
880-
881-
882- ! passed variables
883873 INTEGER (IntKi), INTENT (IN ) :: NumSteps ! < Size of dimension 1 of V (number of time steps)
884874 INTEGER (IntKi), INTENT (IN ) :: NPoints ! < Size of dimension 2 of V (number of grid points)
885875 INTEGER (IntKi), INTENT (IN ) :: NUsrPoints ! < number of user-defined time series
886-
887876 REAL (ReKi), INTENT (INOUT ) :: V (NumSteps,NPoints,3 ) ! < An array containing the summations of the rows of H (NumSteps,NPoints,3).
888-
889877 INTEGER (IntKi), intent ( out ) :: ErrStat ! < Error level
890878 CHARACTER (* ), intent ( out ) :: ErrMsg ! < Message describing error
891879
892-
893- ! local variables
894880 TYPE (FFT_DataType) :: FFT_Data ! data for applying FFT
895881 REAL (SiKi), ALLOCATABLE :: Work ( : ) ! working array to hold coefficients of fft !bjj: made it allocatable so it doesn't take stack space
896-
897-
898- INTEGER (IntKi) :: ITime ! loop counter for time step/frequency
899882 INTEGER (IntKi) :: IVec ! loop counter for velocity components
900883 INTEGER (IntKi) :: IPoint ! loop counter for grid points
901-
884+ logical :: ExitOMPlooping ! flag to indicate skipping other loops
902885 INTEGER (IntKi) :: ErrStat2 ! Error level (local)
903- ! CHARACTER(MaxMsgLen) :: ErrMsg2 ! Message describing error (local)
904886
905887
906- ! initialize variables
907-
908- ! ErrStat = ErrID_None
909- ! ErrMsg = ""
910-
911888 CALL AllocAry(Work, NumSteps, ' Work' ,ErrStat,ErrMsg)
912889 if (ErrStat >= AbortErrLev) return
913890
914- ! Allocate the FFT working storage and initialize its variables
915-
916- CALL InitFFT( NumSteps, FFT_Data, ErrStat= ErrStat2 )
917- CALL SetErrStat(ErrStat2, ' Error in InitFFT' , ErrStat, ErrMsg, ' Coeffs2TimeSeries' )
918- IF (ErrStat >= AbortErrLev) THEN
919- CALL Cleanup()
920- RETURN
921- END IF
922-
923-
924891 ! Get the stationary-point time series.
925892
926- CALL WrScr ( ' Generating time series for all points:' )
893+ CALL WrScr ( ' Generating time series for all points:' )
927894
928- DO IVec= 1 ,3
895+ ! Since we are potentially using OpenMP here, we cannot
896+ ExitOMPlooping = .false.
929897
930- CALL WrScr ( ' ' // Comp( IVec) // ' -component ' )
898+ DO IVec= 1 , 3
931899
932- DO IPoint= 1 ,NPoints ! NTotB
900+ ! make sure we didn't have a failure on prior OMP loop
901+ if (ExitOMPlooping) cycle
933902
934- ! Overwrite the first point with zero. This sets the real (and
935- ! imaginary) part of the steady-state value to zero so that we
936- ! can add in the mean value later.
903+ CALL WrScr ( ' ' // Comp(IVec)// ' -component' )
937904
938- Work(1 ) = 0.0_ReKi
905+ ! The FFT_Data is not thread safe with the allocation inside.
906+ CALL InitFFT( NumSteps, FFT_Data, ErrStat= ErrStat2 )
907+ CALL SetErrStat(ErrStat2, ' Error in InitFFT' , ErrStat, ErrMsg, ' Coeffs2TimeSeries' )
939908
940- ! DO ITime = 2,NumSteps-1
941- DO ITime = 2 ,NumSteps
942- Work(ITime) = V(ITime-1 , IPoint, IVec)
943- ENDDO ! ITime
944-
945- IF (iPoint > NUsrPoints) THEN
946- ! BJJ: we can't override this for the user-input spectra or we don't get the correct time series out.
947- ! Per JMJ, I will keep this here for the other points, but I personally think it could be skipped, too.
909+ ! Proceed only if the InitFFT worked.
910+ ! NOTE: this is to allow for OpenMP - can't return from inside loop
911+ if (ErrStat2 < AbortErrLev) then ! check ErrStat2 for this OMPthread
912+ DO IPoint= 1 ,NPoints ! NTotB
913+ ! Overwrite the first point with zero. This sets the real (and
914+ ! imaginary) part of the steady-state value to zero so that we
915+ ! can add in the mean value later.
916+ Work(1 ) = 0.0_ReKi
917+ Work(2 :NumSteps) = V(1 :NumSteps-1 , IPoint, IVec)
948918
949- ! Now, let's add a complex zero to the end to set the power in the Nyquist
950- ! frequency to zero.
919+ IF (iPoint > NUsrPoints) THEN
920+ ! BJJ: we can't override this for the user-input spectra or we don't get the correct time series out.
921+ ! Per JMJ, I will keep this here for the other points, but I personally think it could be skipped, too.
922+
923+ ! Now, let's add a complex zero to the end to set the power in the Nyquist
924+ ! frequency to zero.
925+ Work(NumSteps) = 0.0
926+ END IF
951927
952- Work(NumSteps) = 0.0
953- END IF
954-
928+ ! perform FFT
929+ CALL ApplyFFT( Work, FFT_Data, ErrStat2 )
930+ IF (ErrStat2 /= ErrID_None ) THEN
931+ CALL SetErrStat(ErrStat2, ' Error in ApplyFFT for point ' // TRIM (Num2LStr(IPoint))// ' .' , ErrStat, ErrMsg, ' Coeffs2TimeSeries' )
932+ IF (ErrStat >= AbortErrLev) EXIT
933+ END IF
934+
935+ V(:,IPoint,IVec) = Work
936+ ENDDO ! IPoint
955937
938+ ! Clean up memory from FFT_Data.
939+ CALL ExitFFT( FFT_Data, ErrStat2 )
940+ CALL SetErrStat(ErrStat2, ' Error in ExitFFT' , ErrStat, ErrMsg, ' Coeffs2TimeSeries' )
956941
957- ! perform FFT
958-
959- CALL ApplyFFT( Work, FFT_Data, ErrStat2 )
960- IF (ErrStat2 /= ErrID_None ) THEN
961- CALL SetErrStat(ErrStat2, ' Error in ApplyFFT for point ' // TRIM (Num2LStr(IPoint))// ' .' , ErrStat, ErrMsg, ' Coeffs2TimeSeries' )
962- IF (ErrStat >= AbortErrLev) EXIT
963- END IF
964-
965- V(:,IPoint,IVec) = Work
966-
967- ENDDO ! IPoint
968-
969- ENDDO ! IVec
970-
971- CALL Cleanup()
972-
973- RETURN
974- CONTAINS
975- ! ...........................................
976- SUBROUTINE Cleanup ()
977-
978- CALL ExitFFT( FFT_Data, ErrStat2 )
979- CALL SetErrStat(ErrStat2, ' Error in ExitFFT' , ErrStat, ErrMsg, ' Coeffs2TimeSeries' )
942+ ! skip further OMP loops if any sequential (or if OMP not used).
943+ ! NOTE: OMP doesn't allow return inside OMP thread
944+ if (ErrStat2 >= AbortErrLev) ExitOMPlooping = .true.
945+ endif
946+ ENDDO ! IVec
980947
981- if (allocated (work)) deallocate (work)
982-
983- END SUBROUTINE Cleanup
984948END SUBROUTINE Coeffs2TimeSeries
985949! =======================================================================
986950! > This routine calculates the two-sided Fourier amplitudes of the frequencies
@@ -2030,28 +1994,28 @@ SUBROUTINE AddMeanAndRotate(p, V, U, HWindDir, VWindDir)
20301994 REAL (ReKi) :: v3(3 ) ! temporary 3-component array containing velocity
20311995 INTEGER (IntKi) :: ITime ! loop counter for time step
20321996 INTEGER (IntKi) :: IPoint ! loop counter for grid points
2033-
2034-
2035-
2036-
1997+
20371998 ! ..............................................................................
2038- ! Add mean wind to u' components and rotate to inertial reference
1999+ ! Add mean wind to u' components and rotate to inertial reference
20392000 ! frame coordinate system
2040- ! ..............................................................................
2001+ ! ..............................................................................
2002+
2003+ ! $OMP PARALLEL DO &
2004+ ! $OMP COLLAPSE(2) &
2005+ ! $OMP DEFAULT(None) &
2006+ ! $OMP PRIVATE(v3) &
2007+ ! $OMP SHARED(p, U, V, HWindDir, VWindDir)
20412008 DO IPoint= 1 ,p% grid% Npoints
20422009 DO ITime= 1 ,p% grid% NumSteps
20432010
2044- ! Add mean wind speed to the streamwise component and
2045- ! Rotate the wind to the X-Y-Z (inertial) reference frame coordinates:
2046-
2011+ ! Add mean wind speed to the streamwise component and
2012+ ! Rotate the wind to the X-Y-Z (inertial) reference frame coordinates:
20472013 v3 = V(ITime,IPoint,:)
20482014 CALL CalculateWindComponents( v3, U(IPoint), HWindDir(IPoint), VWindDir(IPoint), V(ITime,IPoint,:) )
20492015
20502016 ENDDO ! ITime
2051-
20522017 ENDDO ! IPoint
2053-
2054-
2018+
20552019END SUBROUTINE AddMeanAndRotate
20562020! =======================================================================
20572021SUBROUTINE TS_ValidateInput (P , ErrStat , ErrMsg )
0 commit comments